关于聪明钱,去年3月份左右的时候研究过,不过就是简单的检测因子,有明显分层。于是在此基础上做了个策略。
再来看看因子逻辑:
'''
聪明钱的构建思路:
1、先计算指标s:s = |Rt|/sqrt(Vt) 其中,Rt为第t 分钟的涨跌幅,Vt为第t 分钟的成交量。指标St
的值越大,则表示该分钟的交易越“聪明”。
2、对所取时间段的每分钟s进行排序,取成交量累积占比前20%视为聪明钱交易。报告中是取10天的数据,
那么就是2400分钟的数据。
3、构造聪明钱的情绪因子Q:Q=VWAPsmart/VWAPall;其中,VWAPsmart是聪明钱的成交量加权平均价,
VWAPall是所有交易的成交量加权平均价。
'''
没有深入研究,尝试了机器学习,就出了一个策略。
结果样本外基本上就没啥动静,长期处于回撤期间。。。
本次就再返工研究这个因子是啥情况。。。打算放弃这个因子,如有错误,欢迎指正交流
import pandas as pd
from pandas import DataFrame,Series
import numpy as np
def calculate_q_factor(stock,frequency,count,threshold):
'''
在回测中调用
params:
stock:股票
frequency:数据频率
count:数据长度
threshold:聪明钱比例
return:
q_factor:Q因子
'''
price = attribute_history(stock, count, frequency, ['open','close','volume','money'],df=True)
price.loc[price['volume'] == 0,'index_s'] = 0
price.loc[price['volume'] != 0,'index_s'] = abs((price['close'] - price['open']) / price['open']) / np.sqrt(price['volume'])
price.sort_values(by = ['index_s'],ascending = False,inplace = True)
vol_sum = price['volume'].sum()
mon_sum = price['money'].sum()
price['cum_volume'] = price['volume'].cumsum()
vol_threshold = threshold * vol_sum
price_smart = price[price['cum_volume']<= vol_threshold]
vols_sum = price_smart['volume'].sum()
mons_sum = price_smart['money'].sum()
# VWAPs000001.XSHEmart是聪明钱的成交量加权平均价,VWAPall是所有交易的成交量加权平均价。
if vol_sum == 0:
VWAPall = VWAPsmart = np.NaN
else:
VWAPall = mon_sum/vol_sum
if vols_sum == 0:
vols_sum = price['cum_volume'].iloc[0]
VWAPsmart = mons_sum/vols_sum
if VWAPsmart == np.NaN or VWAPall == np.NaN:
q = np.NaN
else:
q = VWAPsmart/VWAPall
return q
def calculate_q_factor(stock,frequency,count,threshold,end_date,num_try = 0):
'''
在研究中调用
params:
stock:股票
frequency:数据频率
count:数据长度
threshold:聪明钱比例
return:
q_factor:Q因子
'''
try:
price = get_price(stock,fields = ['open','close','volume','money'],end_date=end_date,count = count, frequency=frequency)
price.loc[price['volume'] == 0,'index_s'] = 0
price.loc[price['volume'] != 0,'index_s'] = abs((price['close'] - price['open']) / price['open']) / np.sqrt(price['volume'])
price.sort_values(by = ['index_s'],ascending = False,inplace = True)
vol_sum = price['volume'].sum()
mon_sum = price['money'].sum()
price['cum_volume'] = price['volume'].cumsum()
vol_threshold = threshold * vol_sum
price_smart = price[price['cum_volume']<= vol_threshold]
vols_sum = price_smart['volume'].sum()
mons_sum = price_smart['money'].sum()
# VWAPs000001.XSHEmart是聪明钱的成交量加权平均价,VWAPall是所有交易的成交量加权平均价。
if vol_sum == 0:
VWAPall = VWAPsmart = np.NaN
else:
VWAPall = mon_sum/vol_sum
if vols_sum == 0:
vols_sum = price['cum_volume'].iloc[0]
VWAPsmart = mons_sum/vols_sum
if VWAPsmart == np.NaN or VWAPall == np.NaN:
q = np.NaN
else:
q = VWAPsmart/VWAPall
return q
except Exception as e:
num_try += 1
if num_try <5:
return calculate_q_factor(stock,frequency,count,threshold,end_date,num_try)
timeDict = {'1':int(240/1),
'3':int(240/3),
'5':int(240/5),
'7':int(240/7),
'9':int(240/9),
'11':int(240/11),
'13':int(240/13),
'15':int(240/15),
'17':int(240/17),
'19':int(240/19),
'21':int(240/21)
}
N = 20
import matplotlib.pyplot as plt
import pandas as pd
f = plt.figure(figsize=(8,120))
dfs = {}
for threshold in range(2,10,1):
threshold = threshold/20
df = {}
for key,value in timeDict.items():
df[key] = calculate_q_factor('000001.XSHE','{}m'.format(key),int(N*value),threshold,datetime.datetime.now().date())
dfs[threshold] = df
dfs = pd.DataFrame(dfs)
dfs.index = [int(x) for x in dfs.index.values]
dfs.sort_index(ascending= True,inplace = True)
dfs.plot(figsize=(20,10))
<matplotlib.axes._subplots.AxesSubplot at 0x7fa47882bc88>
<Figure size 576x8640 with 0 Axes>
def save_file(filename,data,mode = True):
import pickle
if mode:
with open(filename,'wb') as f:
pickle.dump(data,f)
elif not mode:
with open(filename,'rb') as f:
data = pickle.load(f)
return data
# 计算一段时间每个月的开始和最后一个交易日
def calculate_FL(time_list):
time_list_df = pd.DataFrame(time_list,columns=['time'])
time_list_df['time_str'] = time_list_df['time'].apply(lambda x:datetime.datetime.strftime(x,'%Y-%m-%d'))
time_list_df['year'] = time_list_df['time_str'].apply(lambda x:int(x.split('-')[0]))
time_list_df['month'] = time_list_df['time_str'].apply(lambda x:int(x.split('-')[1]))
time_list_df['day'] = time_list_df['time_str'].apply(lambda x:int(x.split('-')[2]))
time_list_df['cum_year'] = time_list_df['year']-time_list_df['year'].iloc[0]
time_list_df['cum_month'] = time_list_df['cum_year']*12 + time_list_df['month']
time_list_df['diff_month'] = time_list_df['cum_month'].diff()
time_list_df['diff_shift_month'] = time_list_df['diff_month'].shift(-1)
trade_end = list(time_list_df[time_list_df['diff_shift_month']==1]['time_str'].values)
trade_start = list(time_list_df[time_list_df['diff_month'] == 1]['time_str'].values)
trade_start.append(time_list_df['time_str'].iloc[0])
trade_start = sorted(trade_start)
trade_end.append(time_list_df['time_str'].iloc[-1])
return trade_start,trade_end
def delect_stop(stocks,beginDate,n=30*2):
stockList=[]
beginDate = datetime.datetime.strptime(beginDate, "%Y-%m-%d")
for stock in stocks:
start_date=get_security_info(stock).start_date
if start_date<(beginDate-datetime.timedelta(days=n)).date():
stockList.append(stock)
return stockList
#获取股票池
def get_stock(stockPool,begin_date):
if stockPool=='HS300':
stockList=get_index_stocks('000300.XSHG',begin_date)
elif stockPool=='ZZ500':
stockList=get_index_stocks('399905.XSHE',begin_date)
elif stockPool=='ZZ800':
stockList=get_index_stocks('399906.XSHE',begin_date)
elif stockPool=='CYBZ':
stockList=get_index_stocks('399006.XSHE',begin_date)
elif stockPool=='ZXBZ':
stockList=get_index_stocks('399005.XSHE',begin_date)
elif stockPool=='A':
stockList=get_index_stocks('000002.XSHG',begin_date)+get_index_stocks('399107.XSHE',begin_date)
#剔除ST股
st_data=get_extras('is_st',stockList, count = 1,end_date=begin_date)
stockList = [stock for stock in stockList if not st_data[stock][0]]
#剔除停牌、新股及退市股票
stockList=delect_stop(stockList,begin_date)
return stockList
# 计算每一期的股票的池,key为时间,value为所有股票当期的因子值
q_dicts = {}
frequency = '1m'
count = 2400
threshold = 0.1
# 获取每个月的最后一个交易日
from jqdata import *
year_list = ['2010','2011','2012','2013','2014','2015','2016','2017']
for i in year_list:
year_start = str(int(i)-1)
tradeTimeList = get_trade_days(start_date='{}-12-31'.format(year_start), end_date='{}-12-31'.format(i), count=None)
FL = calculate_FL(tradeTimeList)
monthFisrtDay = FL[0]
monthLastDay = FL[1]
print(monthLastDay)
for j in range(len(monthLastDay)):
tradeT = monthLastDay[j]
q_dict = {}
stockPool = get_stock('A',tradeT) # 过滤后的股票池
for stk in stockPool:
q_dict[stk] = calculate_q_factor(stk,frequency,count,threshold,tradeT)
q_dicts[tradeT] = q_dict
# 存储计算的q因子值
df_qdict = pd.DataFrame(q_dicts)
df_qdict.to_csv('all_q_dicts_{}.csv'.format(i))
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-1-a7a5387dee07> in <module>() 5 year_start = str(int(i)-1) 6 tradeTimeList = get_trade_days(start_date='{}-12-31'.format(year_start), end_date='{}-12-31'.format(i), count=None) ----> 7 FL = calculate_FL(tradeTimeList) 8 monthFisrtDay = FL[0] 9 monthLastDay = FL[1] NameError: name 'calculate_FL' is not defined
tradeTimeList = get_trade_days(start_date='{}-12-31'.format(2017), end_date='{}-11-30'.format(2018), count=None)
FL = calculate_FL(tradeTimeList)
monthFisrtDay = FL[0]
monthLastDay = FL[1]
for j in range(len(monthLastDay)):
tradeT = monthLastDay[j]
q_dict = {}
stockPool = get_stock('A',tradeT) # 过滤后的股票池
for stk in stockPool:
q_dict[stk] = calculate_q_factor(stk,frequency,count,threshold,tradeT)
q_dicts[tradeT] = q_dict
# 存储计算的q因子值
df_qdict = pd.DataFrame(q_dicts)
df_qdict.to_csv('all_q_dicts_{}.csv'.format(2018))
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-2-d9927fa5edaf> in <module>() 1 tradeTimeList = get_trade_days(start_date='{}-12-31'.format(2017), end_date='{}-11-30'.format(2018), count=None) ----> 2 FL = calculate_FL(tradeTimeList) 3 monthFisrtDay = FL[0] 4 monthLastDay = FL[1] 5 for j in range(len(monthLastDay)): NameError: name 'calculate_FL' is not defined
import pandas as pd
# 读取数据
df_dict_2010 = pd.read_csv('all_q_dicts_2010.csv',index_col=0)
df_dict_2011 = pd.read_csv('all_q_dicts_2011.csv',index_col=0)
df_dict_2012 = pd.read_csv('all_q_dicts_2012.csv',index_col=0)
df_dict_2013 = pd.read_csv('all_q_dicts_2013.csv',index_col=0)
df_dict_2014 = pd.read_csv('all_q_dicts_2014.csv',index_col=0)
df_dict_2015 = pd.read_csv('all_q_dicts_2015.csv',index_col=0)
df_dict_2016 = pd.read_csv('all_q_dicts_2016.csv',index_col=0)
df_dict_2017 = pd.read_csv('all_q_dicts_2017.csv',index_col=0)
df_dict_2018 = pd.read_csv('all_q_dicts_2018.csv',index_col=0)
df_dict_2018.head()
2009-12-31 | 2010-01-29 | 2010-02-26 | 2010-03-31 | 2010-04-30 | 2010-05-31 | 2010-06-30 | 2010-07-30 | 2010-08-31 | 2010-09-30 | ... | 2018-02-28 | 2018-03-30 | 2018-04-27 | 2018-05-31 | 2018-06-29 | 2018-07-31 | 2018-08-31 | 2018-09-28 | 2018-10-31 | 2018-11-30 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
000001.XSHE | 1.006798 | 0.998273 | 0.998496 | 0.993877 | 0.993433 | 0.996267 | 1.001254 | NaN | NaN | 0.993616 | ... | 1.003686 | 0.984279 | 0.993165 | 1.001389 | 0.993320 | 0.994469 | 0.988993 | 0.993574 | 0.996810 | 0.994382 |
000002.XSHE | 0.997598 | 0.980102 | 0.997362 | 0.998059 | 0.989203 | 0.983346 | 0.996349 | 0.991116 | 0.994350 | 0.999582 | ... | 0.997788 | 0.989308 | 1.004136 | 0.996622 | 0.987748 | 0.994332 | 0.987894 | 0.996752 | 1.010091 | 0.996233 |
000004.XSHE | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | 0.999956 | 0.991227 | 0.993812 | 0.998745 | 1.003219 | 1.001096 | 1.005379 | 0.989531 | 0.968292 | 0.999837 |
000005.XSHE | 0.989354 | 0.958115 | 0.992183 | 0.992988 | 0.994881 | 0.990632 | 0.988380 | 1.000019 | 0.997607 | 0.996333 | ... | 0.996079 | 1.002573 | 1.000663 | 0.998847 | 1.007336 | 0.999687 | 0.994861 | 0.999278 | 0.980513 | 0.986307 |
000006.XSHE | 0.995111 | 0.969805 | 0.999073 | 1.003019 | 0.989185 | 0.988331 | 0.995760 | 0.992118 | 0.993605 | 0.993541 | ... | NaN | 0.974044 | 1.003488 | 0.988209 | 0.986300 | 0.993393 | 0.996315 | 0.989064 | 0.986726 | 0.998673 |
5 rows × 108 columns
dateList = df_dict_2018.columns.values
dateList
array(['2009-12-31', '2010-01-29', '2010-02-26', '2010-03-31', '2010-04-30', '2010-05-31', '2010-06-30', '2010-07-30', '2010-08-31', '2010-09-30', '2010-10-29', '2010-11-30', '2010-12-31', '2011-01-31', '2011-02-28', '2011-03-31', '2011-04-29', '2011-05-31', '2011-06-30', '2011-07-29', '2011-08-31', '2011-09-30', '2011-10-31', '2011-11-30', '2011-12-30', '2012-01-31', '2012-02-29', '2012-03-30', '2012-04-27', '2012-05-31', '2012-06-29', '2012-07-31', '2012-08-31', '2012-09-28', '2012-10-31', '2012-11-30', '2012-12-31', '2013-01-31', '2013-02-28', '2013-03-29', '2013-04-26', '2013-05-31', '2013-06-28', '2013-07-31', '2013-08-30', '2013-09-30', '2013-10-31', '2013-11-29', '2013-12-31', '2014-01-30', '2014-02-28', '2014-03-31', '2014-04-30', '2014-05-30', '2014-06-30', '2014-07-31', '2014-08-29', '2014-09-30', '2014-10-31', '2014-11-28', '2014-12-31', '2015-01-30', '2015-02-27', '2015-03-31', '2015-04-30', '2015-05-29', '2015-06-30', '2015-07-31', '2015-08-31', '2015-09-30', '2015-10-30', '2015-11-30', '2015-12-31', '2016-01-29', '2016-02-29', '2016-03-31', '2016-04-29', '2016-05-31', '2016-06-30', '2016-07-29', '2016-08-31', '2016-09-30', '2016-10-31', '2016-11-30', '2016-12-30', '2017-01-26', '2017-02-28', '2017-03-31', '2017-04-28', '2017-05-31', '2017-06-30', '2017-07-31', '2017-08-31', '2017-09-29', '2017-10-31', '2017-11-30', '2017-12-29', '2018-01-31', '2018-02-28', '2018-03-30', '2018-04-27', '2018-05-31', '2018-06-29', '2018-07-31', '2018-08-31', '2018-09-28', '2018-10-31', '2018-11-30'], dtype=object)
import calendar
def calculate_next_month(day):
current_year = day.year
next_month_number = day.month + 1
if next_month_number == 13:
next_month_number = 1
current_year = current_year + 1
# 计算下个月的起止时间
next_month_start = datetime.datetime.strptime('{}-{}-1'.format(current_year,next_month_number),'%Y-%m-%d')
days = calendar.monthrange(next_month_start.year, next_month_start.month)[1]
next_month_end = next_month_start+ datetime.timedelta(days-1)
return next_month_start,next_month_end
def calculate_class_rts(stocklist,start_date,end_date):
rts_list = []
for stk in stocklist:
price = get_price(stk, start_date=start_date, end_date=end_date, frequency='daily', fields=['close','open'])
rts = price['close'].iloc[-1]/price['close'].iloc[0]-1
rts_list.append(rts)
rts_list = pd.Series(rts_list)
rts_list.dropna(axis = 0,inplace = True)
mean = rts_list.sum()/len(rts_list)
return mean
classA = {}
classB = {}
classC = {}
classD = {}
classE = {}
for i in range(len(dateList)-1):
date = dateList[i]
date_datetime = datetime.datetime.strptime(date,'%Y-%m-%d').date()
# 计算下个月的起止时间
next_month_start,next_month_end = calculate_next_month(date_datetime)
# 根据因子分组
q_factors = df_dict_2018[date]
q_factors.dropna(axis = 0,inplace = True)
q_factors = q_factors.copy()
q_factors.sort_values(ascending = True,inplace = True)
stockListLast=list(q_factors.index.values)
lens = len(stockListLast)
q_A = list(stockListLast[:int(0.2*lens)])
q_B = list(stockListLast[int(0.2*lens):int(0.4*lens)])
q_C = list(stockListLast[int(0.4*lens):int(0.6*lens)])
q_D = list(stockListLast[int(0.6*lens):int(0.8*lens)])
q_E = list(stockListLast[int(0.8*lens):])
classA[date] = calculate_class_rts(q_A,next_month_start,next_month_end)
classB[date] = calculate_class_rts(q_B,next_month_start,next_month_end)
classC[date] = calculate_class_rts(q_C,next_month_start,next_month_end)
classD[date] = calculate_class_rts(q_D,next_month_start,next_month_end)
classE[date] = calculate_class_rts(q_E,next_month_start,next_month_end)
df = pd.DataFrame({'A':classA,
'B':classB,
'C':classC,
'D':classD,
'E':classE})
df.head()
A | B | C | D | E | |
---|---|---|---|---|---|
2009-12-31 | -0.026634 | -0.027453 | -0.032847 | -0.038114 | -0.052188 |
2010-01-29 | 0.085057 | 0.088479 | 0.082246 | 0.076639 | 0.064585 |
2010-02-26 | 0.031484 | 0.038035 | 0.029599 | 0.022562 | 0.011440 |
2010-03-31 | -0.102429 | -0.093342 | -0.084084 | -0.069611 | -0.082053 |
2010-04-30 | -0.086284 | -0.082056 | -0.084876 | -0.076366 | -0.055908 |
(df+1).cumprod().plot(figsize = (20,8))
<matplotlib.axes._subplots.AxesSubplot at 0x7fce97108d30>
本社区仅针对特定人员开放
查看需注册登录并通过风险意识测评
5秒后跳转登录页面...
移动端课程