全A股数据财务质量因子测试¶
因为之前在中证500上财务质量因子测试的效果比较一般,收益率较低,
所以这个测试使用的是全A股的数据,探究在扩展样本类型的情况下,数据是否会有超额收益
1 财务质量因子的计算¶
#工具包、工具函数
#工具函数
import time
from datetime import datetime, timedelta
from jqdata import *
import numpy as np
import pandas as pd
import math
from statsmodels import regression
import statsmodels.api as sm
import matplotlib.pyplot as plt
import datetime
from scipy import stats
from jqfactor import *
import warnings
import seaborn as sns
warnings.filterwarnings('ignore')
#设置画图样式
plt.style.use('ggplot')
#输入起止日期,返回所有自然日日期
def get_date_list(begin_date, end_date):
dates = []
dt = datetime.strptime(begin_date,"%Y-%m-%d")
date = begin_date[:]
while date <= end_date:
dates.append(date)
dt += timedelta(days=1)
date = dt.strftime("%Y-%m-%d")
return dates
def ret_se(start_date='2018-6-1',end_date='2018-7-1',stock_pool=None,weight=0):
pool = stock_pool
if len(pool) != 0:
#得到股票的历史价格数据
df = get_price(list(pool),start_date=start_date,end_date=end_date,fields=['close']).close
df = df.dropna(axis=1)
#获取列表中的股票流通市值对数值
df_mkt = get_fundamentals(query(valuation.code,valuation.circulating_market_cap).filter(valuation.code.in_(df.columns)))
df_mkt.index = df_mkt['code'].values
fact_se =pd.Series(df_mkt['circulating_market_cap'].values,index = df_mkt['code'].values)
fact_se = np.log(fact_se)
else:
df = get_price('000001.XSHG',start_date=start_date,end_date=end_date,fields=['close'])
df['v'] = [1]*len(df)
del df['close']
#相当于昨天的百分比变化
pct = df.pct_change()+1
pct.iloc[0,:] = 1
if weight == 0:
#等权重平均收益结果
se = pct.cumsum(axis=1).iloc[:,-1]/pct.shape[1]
return se
else:
#按权重的方式计算
se = (pct*fact_se).cumsum(axis=1).iloc[:,-1]/sum(fact_se)
return se
#获取所有分组pct
def get_all_pct(pool_dict,trade_list,groups=5):
num = 1
for s,e in zip(trade_list[:-1],trade_list[1:]):
stock_list = pool_dict[s]
stock_num = len(stock_list)//groups
if num == 0:
pct_se_list = []
for i in range(groups):
pct_se_list.append(ret_se(start_date=s,end_date=e,stock_pool=stock_list[i*stock_num:(i+1)*stock_num]))
pct_df1 = pd.concat(pct_se_list,axis=1)
pct_df1.columns = range(groups)
pct_df = pd.concat([pct_df,pct_df1],axis=0)
else:
pct_se_list = []
for i in range(groups):
pct_se_list.append(ret_se(start_date=s,end_date=e,stock_pool=stock_list[i*stock_num:(i+1)*stock_num]))
pct_df = pd.concat(pct_se_list,axis=1)
pct_df.columns = range(groups)
num = 0
return pct_df
def tradedays_before(date,count):#获取指定交易日往前推count天交易日
date = get_price('000001.XSHG',end_date=date,count=count+1).index[0]
return date
def ShiftTradingDay(date,shift):
# 获取所有的交易日,返回一个包含所有交易日的 list,元素值为 datetime.date 类型.
tradingday = get_all_trade_days()
# 得到date之后shift天那一天在列表中的行标号 返回一个数
date = datetime.date(int(str(date)[:4]),int(str(date)[5:7]),int(str(date)[8:10]))
shiftday_index = list(tradingday).index(date)+shift
# 根据行号返回该日日期 为datetime.date类型
return tradingday[shiftday_index]
#进行新股、St股过滤,返回筛选后的股票
def filter_stock(stockList,date,days=21*3,limit=0,skip_paused=1):#日频策略加入开盘涨停过滤
#去除上市距beginDate不足3个月的股票
def delect_stop(stocks,beginDate,n=days):
stockList=[]
beginDate = datetime.datetime.strptime(beginDate, "%Y-%m-%d")
for stock in stocks:
start_date=get_security_info(stock).start_date
if start_date<(beginDate-datetime.timedelta(days=n)).date():
stockList.append(stock)
return stockList
#剔除ST股
st_data=get_extras('is_st',stockList, count = 1,end_date=date)
stockList = [stock for stock in stockList if not st_data[stock][0]]
#剔除当天停牌股
if skip_paused == 1:
paused_df = get_price(stockList,end_date=date,count=1,fields=['paused'])['paused'].T
paused_df.columns = ['paused']
paused_df = paused_df[paused_df['paused']==0]
stockList = paused_df.index
#新股及退市股票
stockList=delect_stop(stockList,date)
#剔除开盘涨停股票
if limit == 1:
#如果需要收盘涨跌停可以改字段即可
df = get_price(stockList,end_date=date,fields=['open','high_limit','low_limit'],count=1).iloc[:,0,:]
df['h_limit']=(df['open']==df['high_limit'])
df['l_limit']=(df['open']==df['low_limit'])
stockList = [df.index[i] for i in range(len(df)) if not (df.h_limit[i] or df.l_limit[i])] #过滤涨跌停股票
return stockList
#设置过滤特定时间涨跌停股票
def filter_limit_stock(stocks_list,end_date,set_time=' 10:00:00'):
set_time = ' 10:00:00'
#如果需要收盘涨跌停可以改字段即可
df = get_price(stocks_list,end_date=str(end_date)[:10]+set_time,frequency='1m',fields=['open','high_limit','low_limit'],count=1,panel=False)
df['h_limit']=(df['open']==df['high_limit'])
df['l_limit']=(df['open']==df['low_limit'])
df.index = df['code'].values
stockList = [df.index[i] for i in range(len(df)) if (not df.h_limit[i]) and (not df.l_limit[i])] #过滤涨跌停股票
return stockList
def get_risk_index(se): #输入每日收益变化,从零算起
return_se = se.cumprod()-1
total_returns = return_se[-1]
total_an_returns = ((1+total_returns)**(250/21/len(return_se))-1)
sharpe = (total_an_returns-0.025)/(np.std(se)*np.sqrt(250/21))
returns_mean = round(se.mean()-1,6)*100
ret = return_se.dropna()
ret = ret+1
maxdown_list = []
for i in range(1,len(ret)):
low = min(ret[i:])
high = max(ret[0:i])
if high>low:
#print(high,low)
maxdown_list.append((high-low)/high)
#print((high-low)/high)
else:
maxdown_list.append(0)
max_drawdown = max(maxdown_list)
#print('策略运行时间:{} 至 {}'.format(str(return_se.index[0])[:10],str(return_se.index[-1])[:10]))
'''
total_returns = str(round(total_returns*100,2))+'%'
total_an_returns = str(round(total_an_returns*100,2))+'%'
sharpe = str(round(sharpe,2))
max_drawdown = str(round(max_drawdown*100,2))+'%'
'''
total_returns = round(total_returns*100,2)
total_an_returns = round(total_an_returns*100,2)
sharpe = round(sharpe,2)
max_drawdown = round(max_drawdown*100,2)
'''
print('总收益:%s'%round(total_returns*100,2)+'%')
print('年化收益:%s'%round(total_an_returns*100,2)+'%')
print('夏普比率:%s'%round(sharpe,2))
print('最大回撤:%s'%round(max_drawdown*100,2)+'%')
'''
return total_returns,total_an_returns,sharpe,max_drawdown,returns_mean
def Get_dayLastYear(date):
start_date = datetime.datetime.strptime(date, "%Y-%m-%d")
return (start_date + relativedelta(years=-1)).strftime("%Y-%m-%d")
#获取日期列表
def get_tradeday_list(start,end,frequency=None,count=None):
if count != None:
df = get_price('000001.XSHG',end_date=end,count=count)
else:
df = get_price('000001.XSHG',start_date=start,end_date=end)
if frequency == None or frequency =='day':
return df.index
else:
df['year-month'] = [str(i)[0:7] for i in df.index]
if frequency == 'month':
return df.drop_duplicates('year-month').index
elif frequency == 'quarter':
df['month'] = [str(i)[5:7] for i in df.index]
df = df[(df['month']=='01') | (df['month']=='04') | (df['month']=='07') | (df['month']=='10') ]
return df.drop_duplicates('year-month').index
elif frequency =='halfyear':
df['month'] = [str(i)[5:7] for i in df.index]
df = df[(df['month']=='01') | (df['month']=='06')]
return df.drop_duplicates('year-month').index
else: # 隔N天取一次
seq = [i for i in range(len(df.index)) if i%frequency == 0]
df = df.iloc[seq,:]
return df.index
def tradedays_before(date,count):#获取指定交易日往前推count天交易日
date = get_price('000001.XSHG',end_date=date,count=count+1).index[0]
return date
#去除上市距beginDate不足 3 个月的股票
def delect_stop(stocks,beginDate,n=30*3):
stockList = []
beginDate = datetime.datetime.strptime(beginDate, "%Y-%m-%d")
for stock in stocks:
start_date = get_security_info(stock).start_date
if start_date < (beginDate-datetime.timedelta(days = n)).date():
stockList.append(stock)
return stockList
#获取股票池
def get_stock(begin_date,pool):
begin_date = str(begin_date)
if pool == "A":
stockList = get_index_stocks('000002.XSHG',begin_date)+get_index_stocks('399107.XSHE',begin_date)
else:
try:
stockList = get_index_stocks(pool,begin_date)
except:
stockList = get_industry_stocks(pool,begin_date)
#剔除ST股
# print(stockList)
try:
st_data = get_extras('is_st', stockList, count = 1, end_date=begin_date)
stockList = [stock for stock in stockList if not st_data[stock][0]]
except:
temp = stockList[:-1]
st_data = get_extras('is_st', temp, count = 1, end_date=begin_date)
stockList = [stock for stock in temp if not st_data[stock][0]]
#剔除停牌、新股及退市股票
stockList = delect_stop(stockList, begin_date)
return stockList
# 计算所有财务质量因子
def cal_quality(pool,date):
factor_df = pd.DataFrame()
q = query(cash_flow.subtotal_operate_cash_inflow,
income.net_profit,
cash_flow.net_operate_cash_flow,
balance.account_receivable,
balance.accounts_payable,
balance.inventories,
balance.good_will,
balance.equities_parent_company_owners,
balance.deferred_tax_assets,
indicator.operating_expense_to_total_revenue,
income.income_tax_expense,
balance.account_receivable,
balance.cash_equivalents,
balance.total_assets,
balance.total_current_liability,
balance.total_liability,
cash_flow.goods_and_services_cash_paid,
cash_flow.goods_sale_and_service_render_cash,
income.operating_cost,
income.operating_revenue,income.code).filter(valuation.code.in_(pool) )
# 读入数据
df_now = get_fundamentals(q, date)
df_before = get_fundamentals(q,Get_dayLastYear(date))
# 处理索引值
df_now.set_index(["code"],inplace = True)
df_before.set_index(["code"],inplace = True)
codeset = set(df_now.index)&set(df_before.index)
df_now = df_now.loc[[a for a in codeset],:]
df_before = df_before.loc[[a for a in codeset],:]
df=df_now
# print(df.columns)
# print(df_before.columns)
# print(df_now['income_tax_expense'])
# print(df_before['income_tax_expense'])
#1营业成本/营业收入 及其同比
factor = "cost_over_rev"
df_now[factor] = df_now["operating_cost"]/df_now["operating_revenue"]
df_before[factor] = df_before["operating_cost"]/df_before["operating_revenue"]
factor_df[factor] = df_now[factor]
factor_df[factor+"_ChangeRate"] = (df_now[factor]-df_before[factor])/df_before[factor]
# 2存货周转率 = 营业成本(TTM)/存货
factor = "inventory_turnover_rate"
temp_df_now = get_factor_values(securities=pool, factors=[factor],end_date=date,count =1)[factor]
temp_df_before = get_factor_values(securities=pool, factors=[factor],end_date=Get_dayLastYear(date),count =1)[factor]
temp_factor_df = pd.DataFrame()
temp_factor_df[factor] = temp_df_now.iloc[0,:]
temp_factor_df[factor+"_changeRate"] = ((temp_df_now.iloc[0,:] -temp_df_before.iloc[0:])/temp_df_before.iloc[0:] ).iloc[0,:]
factor_df = pd.merge(factor_df,temp_factor_df,on = "code")
# 3应收账款周转率 及其同比
factor = "account_receivable_turnover_rate"
temp_df_now = get_factor_values(securities=pool, factors=[factor],end_date=date,count =1)[factor]
temp_df_before = get_factor_values(securities=pool, factors=[factor],end_date=Get_dayLastYear(date),count =1)[factor]
temp_factor_df = pd.DataFrame()
temp_factor_df[factor] = temp_df_now.iloc[0,:]
temp_factor_df[factor+"_changeRate"] = ((temp_df_now.iloc[0,:] -temp_df_before.iloc[0:])/temp_df_before.iloc[0:] ).iloc[0,:]
factor_df = pd.merge(factor_df,temp_factor_df,on = "code")
# 4总资产周转率 及其同比
factor = "total_asset_turnover_rate"
temp_df_now = get_factor_values(securities=pool, factors=[factor],end_date=date,count =1)[factor]
temp_df_before = get_factor_values(securities=pool, factors=[factor],end_date=Get_dayLastYear(date),count =1)[factor]
temp_factor_df = pd.DataFrame()
temp_factor_df[factor] = temp_df_now.iloc[0,:]
temp_factor_df[factor+"_changeRate"] = ((temp_df_now.iloc[0,:] -temp_df_before.iloc[0:])/temp_df_before.iloc[0:] ).iloc[0,:]
factor_df = pd.merge(factor_df,temp_factor_df,on = "code")
# 5销售商品、提供劳务收到的现金/营业收入
factor_df["cash_over_rev"] = df["goods_sale_and_service_render_cash"]/df["operating_revenue"]
# 6购买商品、接受劳务支付的现金/营业成本
factor_df["cash_over_cost"] = df["goods_and_services_cash_paid"]/df["operating_revenue"]
# 7流动负债/总负债
factor_df["current_over_liability"] = df["total_current_liability"]/df["total_liability"]
# 8货币资金/总资产
factor_df["money_over_asset"] = df["cash_equivalents"]/df["total_assets"]
# 9所得税费用除以营业收入 及其同比
factor = "income_tax_expense_over_rev"
df_now[factor] = df_now['income_tax_expense']/df_now["operating_revenue"]
df_before[factor] = df_before['income_tax_expense']/df_before["operating_revenue"]
factor_df[factor] = df_now[factor]
factor_df[factor+"_ChangeRate"] = (df_now[factor]-df_before[factor])/df_before[factor]
# 10应收账款除以营业收入
factor_df["account_receivable"] = df["account_receivable"]/df["operating_revenue"]
# 11营业费用除以营业收入
factor_df["operating_expense_to_total_revenue"] = df.operating_expense_to_total_revenue
# 12递延所得税资产
factor_df["deferred_tax_assets"] = df. deferred_tax_assets
# 13商誉占净资产比率
factor_df["goodWill_over_netAssets"] = df.good_will/df.equities_parent_company_owners
# 14应付账款同比增长率
factor_df["accounts_payable.rate"] = (df_now["accounts_payable"]- df_before["accounts_payable"])/df_before["accounts_payable"]
#15 应收账款同比增长率
factor_df["account_receivable.rate"] = (df_now["account_receivable"]- df_before["account_receivable"])/df_before["account_receivable"]
#16 经营性现金流同比增长率
factor_df["net_operate_cash_flow.rate"] = (df_now["net_operate_cash_flow"]- df_before["net_operate_cash_flow"])/df_before["net_operate_cash_flow"]
# 17 净利润同比增长率 net_profit
factor_df["net_profit.rate"] = (df_now["net_profit"]- df_before["net_profit"])/df_before["net_profit"]
# 18 经营现金流同比/净利润同比
factor_df["cash_flow_over_profit.rate"] = factor_df["net_operate_cash_flow.rate"]/factor_df["net_profit.rate"]
# 19 营业收入同比增长率
factor_df["operating_revenue.rate"] = (df_now["operating_revenue"]- df_before["operating_revenue"])/df_before["operating_revenue"]
#20 应收账款同比/营业收入同比
factor_df["account_receivable_over_revenue.rate"] = factor_df["account_receivable.rate"]/factor_df["operating_revenue.rate"]
#21 经营现金流入同比增长率
factor_df["subtotal_operate_cash_inflow.rate"] = (df_now["subtotal_operate_cash_inflow"]- df_before["subtotal_operate_cash_inflow"])/df_before["subtotal_operate_cash_inflow"]
# 经营现金流入同比/净利润同比
factor_df["cash_inflow_over_profit.rate"] = factor_df["subtotal_operate_cash_inflow.rate"]/factor_df["net_profit.rate"]
# 23 应付款项同比/营业收入同比
factor_df["accounts_payable_over_rev"]= factor_df["accounts_payable.rate"]/factor_df["operating_revenue.rate"]
for i in factor_df.columns:
factor_df[i] = factor_df[i].fillna(factor_df[i].mean())
return factor_df
pool = ['000001.XSHE','600000.XSHG']
date = '2015-10-01'
cal_quality(pool,date)
#设置统计起止日期
date_start = '2010-01-01'
date_end = '2018-12-31'
#设置调仓频率
trade_freq = 'day'
#设置基准与股票池
index = 'A'
#获取调仓时间列表
if trade_freq == 'month':
#获取交易日列表,每月首个交易日
date_list = get_tradeday_list(start=date_start,end=date_end,frequency='month',count=None) #自然月的第一天
elif trade_freq == 'day':
date_list = get_tradeday_list(start=date_start,end=date_end,count=None)#获取回测日期间的所有交易日
else:
date_day_list = get_tradeday_list(start=date_start,end=date_end,count=None)#获取回测日期间的所有交易日
date_list = [date_day_list[i] for i in range(len(date_day_list)) if i%int(trade_freq) == 0]
date_list[:3],date_list[-3:]
# #读取计算出来的因子值
# import pickle
# pkl_file = open('../factor_doc/quality_A_20181231_factor.pkl', 'rb')
# factor_dict = pickle.load(pkl_file)
# pkl_file.close()
# d = str(date_list[4])[:10]
# factor_dict[d].head(3)
# 计算和存储数据
factor_dict = {}
#循环时间列表获取原始因子数据组成dict
for end_date in date_list:
end_date=str(end_date)[:10]
print('正在计算 {} 因子数据......'.format(end_date))
stocks_list = get_stock(end_date,index)
pool = stocks_list
factor_dict[end_date] = cal_quality(pool,end_date)#计算因子值进行存储
factor_dict[end_date].head(3)
#存储计算出来的因子值
import pickle
pkl_file = open('../factor_doc/quality_A_20181231_factor.pkl', 'wb')
pickle.dump(factor_dict, pkl_file, 0)
pkl_file.close()
#读取计算出来的因子值
import pickle
pkl_file = open('../factor_doc/quality_A_20181231_factor.pkl', 'rb')
factor_dict = pickle.load(pkl_file)
pkl_file.close()
d = str(date_list[4])[:10]
factor_dict[d].head(3)
2 IC、IR值¶
def factor_IC_analysis(factorData, Field, begin_date, end_date, period):
dateList = get_tradeday_list(begin_date,end_date,frequency=period)
IC_norm = {}
IC_rank = {}
R_T = pd.DataFrame()
for date in dateList[:-1]:
#取股票池
stockList = list(factorData[str(date)[:10]].index)
#获取横截面收益率
df_close=get_price(stockList, date, dateList[list(dateList).index(date)+1], 'daily', ['close'])
if df_close.empty:
continue
df_pchg=df_close['close'].iloc[-1,:]/df_close['close'].iloc[0,:]-1
R_T['pchg']=df_pchg
#获取因子数据
factor_data = factorData[str(date)[:10]][Field]
#数据标准化
factor_data = standardlize(factor_data, inf2nan=True, axis=0)
R_T['factor'] = factor_data
R_T = R_T.dropna()
IC_norm[date]=stats.pearsonr(R_T.pchg, R_T['factor'])[0]
IC_rank[date]=stats.pearsonr(R_T.pchg.rank(), R_T['factor'].rank())[0]
IC_norm = pd.Series(IC_norm).dropna()
IC_rank = pd.Series(IC_rank).dropna()
return (IC_norm, IC_rank)
# 因子 IC 分析
begin_date = '2010-01-01'
end_date ='2018-12-16'
period =21
IndexList = factor_dict[str(date_list[0])[:10]].columns
ColumnsList = ["IC", "IR", "RankIC", "RankIR"]
IC = pd.DataFrame(index = IndexList, columns = ColumnsList)
for Field in IndexList:
tempIC = factor_IC_analysis(factor_dict, Field, begin_date, end_date, period)
# 分析结果保存
IC.loc[Field , "IC"] = tempIC[0].mean()
IC.loc[Field , "IR"] = tempIC[0].mean() / tempIC[0].std()
IC.loc[Field, "RankIC"] = tempIC[1].mean()
IC.loc[Field, "RankIR"] = tempIC[1].mean() / tempIC[1].std()
# 画图
IC = IC.astype(float)
fig = plt.figure(figsize=(15,6))
ax = fig.add_subplot(111)
sns.heatmap(abs(IC), annot=True, vmax=1, vmin = 0)
ax.set_title("IC / IR", fontsize=21)
fig.show()
3 收益率回测¶
其中
营业成本/营业收入 反转因子 年化收益 7.69%
(营业收入/营业成本)同比 反转因子 年化收益 6.38%
存货周转率 反转因子 年化收益可达10.44%
总资产周转率 反转因子 年化收益 7.93%
总资产周转率同比 反转因子 年化收益 7.71%
购买商品、接受劳务支付的现金/营业成本 动量因子 年化收益 8.22%
流动负债/总负债 动量因子 年化收益 8.00%
货币资金/总资产 动量因子 年化收益 7.00%
所得税费用比营业收入 动量 年化收益 7.28%
所得税费用比营业收入同比 动量 年化收益6.11%
应付账款同比增长率 反转 年化收益 7.24%
应收账款同比增长率 动量 年化收益6.44%
净利润同比增长率 动量 年化收益8.62%
经营现金流入同比增长率 反转 年化收益 6.50%
#定义回测函数(可以设置不同的交易频率)
def backtest(factor_df,period =20,rev=0,fe=0.002,hold_num=50,pct_adj=1,index='000905.XSHG',set_time=' 10:30:00',show=1,trade_record=0):
print('回测设置:0反转1动量:{} 调仓时间:{} 成本:{} 持仓:{} 调仓方式:{}'.format(rev,period,fe,hold_num,pct_adj))
if pct_adj == 3:
print('调仓时间:T {} —— T+1 {}'.format(set_time,set_time))
elif pct_adj == 1:
print('调仓时间:T+1 {} —— T+2 {}'.format('open','open'))
elif pct_adj == 5:
print('调仓时间:T+1 {} —— T+2 {}'.format(set_time,set_time))
backtest_list = get_tradeday_list(start="2011-08-02",end=factor_df.index[-1],frequency=period)
return_alpha_df = pd.DataFrame()
return_index_df = pd.DataFrame()
return_df = pd.DataFrame()
trade_record_df = pd.DataFrame()
#中间变量
pool_temp_bf = []
tur = 0
tur_list = []
trade = 0
mark = 0
year = str(backtest_list[0])[:4]
for d1,d2 in zip(backtest_list[:-1],backtest_list[1:]):
d1_ = ShiftTradingDay(d1,1) #往后推一天
d2_ = ShiftTradingDay(d2,1)
d1 = str(d1)[:10]
d2 = str(d2)[:10]
#调仓日获取最新股票列表
if d1 in factor_df.index:
#print('===触发调仓===')
trade = 1
#获取头部股票
#print('{}进行调仓操作'.format(str(d1_)[:10]))
df_temp = factor_df.loc[d1,:].sort_values(ascending=True) #mo默认从小到大排序
df_temp = df_temp.dropna()
#剔除指定时间涨跌停股票
if pct_adj == 3:
#获取当前结果与涨停价格
df_limit = get_price(list(df_temp.index),end_date=d1+set_time,count=1,fields=['close','high_limit'])
df_limit = (df_limit['close']/df_limit['high_limit']).T
df_limit.columns = ['limit']
df_temp = pd.concat([df_limit,df_temp],axis=1)
df_temp = df_temp[df_temp['limit'] <= 0.99]
df_temp = df_temp.sort_values(d1,ascending=True)
elif pct_adj == 5:
#获取当前结果与涨停价格
df_limit = get_price(list(df_temp.index),end_date=str(d1_)[:10]+set_time,count=1,fields=['close','high_limit'])
df_limit = (df_limit['close']/df_limit['high_limit']).T
df_limit.columns = ['limit']
df_temp = pd.concat([df_limit,df_temp],axis=1)
df_temp = df_temp[df_temp['limit'] <= 0.99]
df_temp = df_temp.sort_values(d1,ascending=True)
if rev == 0:
pool_temp = df_temp.index[:hold_num]
else:
pool_temp = df_temp.index[-hold_num:]
if trade_record == 1:
if len(pool_temp) != hold_num:
print('{}选股{} 不够{}只'.format(d1,len(pool_temp),hold_num))
#pool_temp = pool_temp+[np.nan]*(hold_num-len(pool_temp))
else:
if pct_adj == 3:
trade_record_df[str(d1)[:10]] = pool_temp #T日交易,记为d1,T+1日交易,记为d1_
else:
trade_record_df[str(d1_)[:10]] = pool_temp #T日交易,记为d1,T+1日交易,记为d1_
tur_temp = len([stock for stock in pool_temp if stock not in pool_temp_bf])/len(pool_temp) #换手率
tur_list.append(tur_temp)
#print换手率b
if str(d1)[:4] == year:
tur += tur_temp
else:
print('{} 年持仓交易换手率为: {}'.format(year,round(tur,2)))
tur = 0
year = str(d1)[:4]
pool_temp_bf = pool_temp
if pct_adj == 1: #常规的方法,T日因子,T+1日交易open 2 open
#计算组合收益
df1 = get_price(list(pool_temp),end_date=d1_,count=1,fields=['open'])['open'] #index为日期,columns为股票名称
df1 = df1.dropna(axis=1) #去掉NAN值,删除列
df2 = get_price(list(df1.columns),end_date=d2_,count=1,fields=['open'])['open']
ret = (df2.values/df1.values).mean() #计算组合收益均值
#调仓是减去手续费
if trade == 1:
ret = ret*(1-tur_temp*fe)
trade = 0
#计算同期指数收益率
df_index1 = get_price(index,end_date=d1_,count=1,fields=['open'])['open']
df_index2 = get_price(index,end_date=d2_,count=1,fields=['open'])['open']
index_ret = df_index2.values[-1]/df_index1.values[-1]
elif pct_adj == 2: #常规的方法
#计算组合收益
df1 = get_price(list(pool_temp),end_date=d1,count=1,fields=['close'])['close'] #index为日期,columns为股票名称
df1 = df1.dropna(axis=1) #去掉NAN值,删除列
df2 = get_price(list(df1.columns),end_date=d2,count=1,fields=['close'])['close']
ret = (df2.values/df1.values).mean() #计算组合收益均值
#调仓是减去手续费
if trade == 1:
ret = ret*(1-tur_temp*fe)
trade = 0
#计算同期指数收益率
df_index1 = get_price(index,end_date=d1,count=1,fields=['close'])['close']
df_index2 = get_price(index,end_date=d2,count=1,fields=['close'])['close']
index_ret = df_index2.values[-1]/df_index1.values[-1]
elif pct_adj == 3: #设置指定时间点,T日因子,计算T日收益
#计算组合收益
df1 = get_price(list(pool_temp),end_date=str(d1)[:10]+set_time,frequency='1m',count=1,fields=['close'])['close'] #index为日期,columns为股票名称
df1 = df1.dropna(axis=1) #去掉NAN值,删除列
df2 = get_price(list(df1.columns),end_date=str(d2)[:10]+set_time,frequency='1m',count=1,fields=['close'])['close']
ret = (df2.values/df1.values).mean() #计算组合收益均值
#调仓是减去手续费
if trade == 1:
ret = ret*(1-tur_temp*fe)
trade = 0
#计算同期指数收益率
df_index1 = get_price(index,end_date=str(d1)[:10]+set_time,count=1,frequency='1m',fields=['close'])['close']
df_index2 = get_price(index,end_date=str(d2)[:10]+set_time,count=1,frequency='1m',fields=['close'])['close']
index_ret = df_index2.values[-1]/df_index1.values[-1]
elif pct_adj == 5: #设置指定时间点,T日因子,计算T+1日收益
#计算组合收益
df1 = get_price(list(pool_temp),end_date=str(d1_)[:10]+set_time,frequency='1m',count=1,fields=['close'])['close'] #index为日期,columns为股票名称
df1 = df1.dropna(axis=1) #去掉NAN值,删除列
df2 = get_price(list(df1.columns),end_date=str(d2_)[:10]+set_time,frequency='1m',count=1,fields=['close'])['close']
ret = (df2.values/df1.values).mean() #计算组合收益均值
#调仓是减去手续费
if trade == 1:
ret = ret*(1-tur_temp*fe)
trade = 0
#计算同期指数收益率
df_index1 = get_price(index,end_date=str(d1_)[:10]+set_time,count=1,frequency='1m',fields=['close'])['close']
df_index2 = get_price(index,end_date=str(d2_)[:10]+set_time,count=1,frequency='1m',fields=['close'])['close']
index_ret = df_index2.values[-1]/df_index1.values[-1]
elif pct_adj == 4: #有重叠模型
if mark ==0:#首次
pool_old = []
pool_new = pool_temp
#计算组合日内收益
df1 = get_price(list(pool_new),end_date=str(d1_)[:10],count=1,fields=['open'])['open'] #index为日期,columns为股票名称
df1 = df1.dropna(axis=1) #去掉NAN值,删除列
df2 = get_price(list(df1.columns),end_date=str(d1_)[:10],count=1,fields=['close'])['close']
ret1 = (df2.values/df1.values).mean() #计算组合收益均值
#整体收益
ret = (0.5+ret1/2)
#调仓是减去手续费
if trade == 1:
ret = ret*(1-0.5*tur_temp*fe)
trade = 0
#计算同期指数收益率
df_index1 = get_price(index,end_date=str(d1)[:10],count=1,fields=['close'])['close']
df_index2 = get_price(index,end_date=str(d2)[:10],count=1,fields=['close'])['close']
index_ret = df_index2.values[-1]/df_index1.values[-1]
mark = 1
else:#多次
pool_old = pool_new
pool_new = pool_temp
#计算组合日内收益
df1 = get_price(list(pool_new)+list(pool_old),end_date=str(d1_)[:10],count=1,fields=['open'])['open'] #index为日期,columns为股票名称
df1 = df1.dropna(axis=1) #去掉NAN值,删除列
df2 = get_price(list(df1.columns),end_date=str(d1_)[:10],count=1,fields=['close'])['close']
ret1 = (df2.values/df1.values).mean() #计算组合收益均值
#计算组合日间收益
df1 = get_price(list(pool_old),end_date=str(d1)[:10],count=1,fields=['close'])['close'] #index为日期,columns为股票名称
df1 = df1.dropna(axis=1) #去掉NAN值,删除列
df2 = get_price(list(df1.columns),end_date=str(d2)[:10],count=1,fields=['open'])['open']
ret2 = (df2.values/df1.values).mean() #计算组合收益均值
#整体收益
ret = ret1*(0.5+ret2/2)
#调仓是减去手续费
if trade == 1:
ret = ret*(1-0.5*tur_temp*fe)
trade = 0
#计算同期指数收益率
df_index1 = get_price(index,end_date=str(d1)[:10],count=1,fields=['close'])['close']
df_index2 = get_price(index,end_date=str(d2)[:10],count=1,fields=['close'])['close']
index_ret = df_index2.values[-1]/df_index1.values[-1]
return_alpha_df[d1] = [ret-index_ret] #记录超额收益
return_df[d1] = [ret] #记录组合收益
return_index_df[d1] = [index_ret] #记录基准收益
return_df = return_df.T
return_alpha_df = return_alpha_df.T
return_index_df = return_index_df.T
return_all_df = pd.concat([return_df,return_alpha_df+1,return_index_df],axis=1)
return_all_df.columns = ['ret','alpha','index']
#进行调仓股票记录
if trade_record== 1:
trade_record_df.to_csv('trade_record_goal.csv')
summary = pd.DataFrame(index=['总收益','年化收益','夏普率','最大回撤','每日收益%'])
summary['ret'] = get_risk_index(return_all_df['ret'])
summary['alpha']=get_risk_index(return_all_df['alpha'])
summary['index']=get_risk_index(return_all_df['index'])
summary = summary.T
print('策略每次调仓平均交易换手率为:{}'.format(round(np.mean(tur_list),3)))
print('=策略运行时间:{} 至 {}'.format(str(return_all_df.index[0])[:10],str(return_all_df.index[-1])[:10]))
if show == 1:
(return_all_df).cumprod().plot(figsize=(15,6))
plt.show()
print(summary)
# 数据字典
factor_name = ["营业成本/营业收入","(营业收入/营业成本)同比","存货周转率","存货周转率同比",
"应收账款周转率","应收账款周转率同比","总资产周转率","总资产周转率同比",
"销售商品、提供劳务收到的现金/营业收入","购买商品、接受劳务支付的现金/营业成本",
"流动负债/总负债","货币资金/总资产","所得税费用比营业收入", "所得税费用比营业收入同比",
"应收账款除以营业收入","营业费用除以营业收入","递延所得税资产","商誉占净资产比率",
"应付账款同比增长率","应收账款同比增长率","经营性现金流同比增长率","净利润同比增长率",
"经营现金流同比/净利润同比","营业收入同比增长率","应收账款同比/营业收入同比","经营现金流入同比增长率","经营现金流入同比/净利润同比",
"应付款项同比/营业收入同比"]
print(len(factor_name))
print(len(factor_dict[d].columns))
quality_name_dict = dict(zip(factor_dict[d].columns, factor_name))
quality_name_dict
for factor_choose in factor_dict['2010-01-04'].columns:
for j in [0,1]:
print('=========因子选择:{}=========='.format(quality_name_dict[factor_choose]))
#获取因子值组成df
factor_df = pd.DataFrame()
for d in factor_dict.keys() :
d = str(d)[:10]
#factor_df[d] = factor_dict[d].loc[:,'pct_1']/factor_dict[d].loc[:,'pct_buy_1']
factor_df[d] = factor_dict[d].loc[:,factor_choose]#/factor_dict[d].loc[:,'pct_buy_1']
#进行股票代码格式转换
#factor_df.index = [normalize_code('0'*(6-len(str(i)))+str(i)) for i in factor_df.index] #股票代码处理
factor_df =factor_df.T
backtest(factor_df,period=21,rev=j,fe=0.00,hold_num=50,pct_adj=1,index='000985.XSHG',show=1)
4 分层效果¶
查看表现比较好的几个因子的分层回测效果,可以看出他们都具有比较单调的收益率曲线分层
#定义分组回测函数
def backtest_5(factor_df,groups=5,pct_adj=1,index='000905.XSHG',set_time=' 10:30:00'):
print('------------分层效果------------')
if pct_adj == 3:
print('调仓时间:T {} —— T+1 {}'.format(set_time,set_time))
elif pct_adj == 1:
print('调仓时间:T+1 {} —— T+2 {}'.format('open','open'))
elif pct_adj == 5:
print('调仓时间:T+1 {} —— T+2 {}'.format(set_time,set_time))
backtest_list = get_tradeday_list(start="2011-08-02",end=factor_df.index[-1],count=None)
return_index_df = pd.DataFrame()
return_df = pd.DataFrame()
#中间变量
mark = 0
year = str(backtest_list[0])[:4]
for d1,d2 in zip(backtest_list[:-1],backtest_list[1:]):
d1_ = ShiftTradingDay(d1,1) #往后推一天
d2_ = ShiftTradingDay(d2,1)
d1 = str(d1)[:10]
d2 = str(d2)[:10]
#调仓日获取最新股票列表
if d1 in factor_df.index:
#print('===触发调仓===')
#获取头部股票
#print('{}进行调仓操作'.format(str(d1_)[:10]))
df_temp = factor_df.loc[d1,:].sort_values(ascending=True) #mo默认从小到大排序
df_temp = df_temp.dropna()
#剔除指定时间涨跌停股票
if pct_adj == 3:
#获取当前结果与涨停价格
df_limit = get_price(list(df_temp.index),end_date=d1+set_time,count=1,fields=['close','high_limit'])
df_limit = (df_limit['close']/df_limit['high_limit']).T
df_limit.columns = ['limit']
df_temp = pd.concat([df_limit,df_temp],axis=1)
df_temp = df_temp[df_temp['limit'] <= 0.99]
df_temp = df_temp.sort_values(d1,ascending=True)
elif pct_adj == 5:
#获取当前结果与涨停价格
df_limit = get_price(list(df_temp.index),end_date=str(d1_)[:10]+set_time,count=1,fields=['close','high_limit'])
df_limit = (df_limit['close']/df_limit['high_limit']).T
df_limit.columns = ['limit']
df_temp = pd.concat([df_limit,df_temp],axis=1)
df_temp = df_temp[df_temp['limit'] <= 0.99]
df_temp = df_temp.sort_values(d1,ascending=True)
#获取分组股票池
stock_num = len(df_temp.index)//groups
pool_temp1 = df_temp.index[:stock_num]
pool_temp2 = df_temp.index[stock_num:2*stock_num]
pool_temp3 = df_temp.index[2*stock_num:3*stock_num]
pool_temp4 = df_temp.index[-2*stock_num:-stock_num]
pool_temp5 = df_temp.index[-stock_num:]
pool_temp_list = [pool_temp1,pool_temp2,pool_temp3,pool_temp4,pool_temp5]
#统计计算分组收益
ret_list = []
if pct_adj == 1: #常规的方法,T日因子,T+1日交易open 2 open
#计算同期指数收益率
df_index1 = get_price(index,end_date=d1_,count=1,fields=['open'])['open']
df_index2 = get_price(index,end_date=d2_,count=1,fields=['open'])['open']
index_ret = df_index2.values[-1]/df_index1.values[-1]
#计算组合收益
for i in range(5):
pool_temp = pool_temp_list[i]
df1 = get_price(list(pool_temp),end_date=d1_,count=1,fields=['open'])['open'] #index为日期,columns为股票名称
df1 = df1.dropna(axis=1) #去掉NAN值,删除列
df2 = get_price(list(df1.columns),end_date=d2_,count=1,fields=['open'])['open']
ret = (df2.values/df1.values).mean() #计算组合收益均值
ret_list.append(ret)
elif pct_adj == 2: #常规的方法
#计算同期指数收益率
df_index1 = get_price(index,end_date=d1,count=1,fields=['close'])['close']
df_index2 = get_price(index,end_date=d2,count=1,fields=['close'])['close']
index_ret = df_index2.values[-1]/df_index1.values[-1]
#计算组合收益
for i in range(5):
pool_temp = pool_temp_list[i]
df1 = get_price(list(pool_temp),end_date=d1,count=1,fields=['close'])['close'] #index为日期,columns为股票名称
df1 = df1.dropna(axis=1) #去掉NAN值,删除列
df2 = get_price(list(df1.columns),end_date=d2,count=1,fields=['close'])['close']
ret = (df2.values/df1.values).mean() #计算组合收益均值
ret_list.append(ret)
elif pct_adj == 3: #设置指定时间点,T日因子,计算T日收益
#计算同期指数收益率
df_index1 = get_price(index,end_date=str(d1)[:10]+set_time,count=1,frequency='1m',fields=['close'])['close']
df_index2 = get_price(index,end_date=str(d2)[:10]+set_time,count=1,frequency='1m',fields=['close'])['close']
index_ret = df_index2.values[-1]/df_index1.values[-1]
#计算组合收益
for i in range(5):
pool_temp = pool_temp_list[i]
df1 = get_price(list(pool_temp),end_date=str(d1)[:10]+set_time,frequency='1m',count=1,fields=['close'])['close'] #index为日期,columns为股票名称
df1 = df1.dropna(axis=1) #去掉NAN值,删除列
df2 = get_price(list(df1.columns),end_date=str(d2)[:10]+set_time,frequency='1m',count=1,fields=['close'])['close']
ret = (df2.values/df1.values).mean() #计算组合收益均值
ret_list.append(ret)
elif pct_adj == 5: #设置指定时间点,T日因子,计算T+1日收益
#计算同期指数收益率
df_index1 = get_price(index,end_date=str(d1_)[:10]+set_time,count=1,frequency='1m',fields=['close'])['close']
df_index2 = get_price(index,end_date=str(d2_)[:10]+set_time,count=1,frequency='1m',fields=['close'])['close']
index_ret = df_index2.values[-1]/df_index1.values[-1]
#计算组合收益
for i in range(5):
pool_temp = pool_temp_list[i]
df1 = get_price(list(pool_temp),end_date=str(d1_)[:10]+set_time,frequency='1m',count=1,fields=['close'])['close'] #index为日期,columns为股票名称
df1 = df1.dropna(axis=1) #去掉NAN值,删除列
df2 = get_price(list(df1.columns),end_date=str(d2_)[:10]+set_time,frequency='1m',count=1,fields=['close'])['close']
ret = (df2.values/df1.values).mean() #计算组合收益均值
ret_list.append(ret)
return_df[d1] = ret_list #记录组合收益
return_index_df[d1] = [index_ret] #记录基准收益
return_df = return_df.T
return_index_df = return_index_df.T
return_all_df = pd.concat([return_df,return_index_df],axis=1)
return_all_df.columns = ['ret1','ret2','ret3','ret4','ret5','index']
return_all_df.cumprod().plot(figsize=(15,6))
plt.show()
for factor_choose in ['cost_over_rev','cost_over_rev_ChangeRate','inventory_turnover_rate','total_asset_turnover_rate', 'total_asset_turnover_rate_changeRate',
'cash_over_cost','current_over_liability', 'money_over_asset','income_tax_expense_over_rev','income_tax_expense_over_rev_ChangeRate',
'accounts_payable.rate','account_receivable.rate', 'net_profit.rate','subtotal_operate_cash_inflow.rate']:
print('=========因子选择:{}=========='.format(quality_name_dict[factor_choose]))
#获取因子值组成df
factor_df = pd.DataFrame()
for d in factor_dict.keys() :
d = str(d)[:10]
#factor_df[d] = factor_dict[d].loc[:,'pct_1']/factor_dict[d].loc[:,'pct_buy_1']
factor_df[d] = factor_dict[d].loc[:,factor_choose]#/factor_dict[d].loc[:,'pct_buy_1']
#进行股票代码格式转换
#factor_df.index = [normalize_code('0'*(6-len(str(i)))+str(i)) for i in factor_df.index] #股票代码处理
factor_df =factor_df.T
backtest_5(factor_df,groups=5,pct_adj=1,index='000985.XSHG',set_time=' 10:30:00')