请 [注册] 或 [登录]  | 返回主站

量化交易吧 /  数理科学 帖子:3367083 新帖:0

波动率因子在A股市场探索

外汇交易达人发表于:6 月 12 日 20:00回复(1)

引言¶

研究目的:¶

本文参考民生证券因子专题研究四《低波动异象:解析、改进及成因实证》内容,对波动率因子进行探索。在量化投资的领域,波动率是最常见的选股因子之一。全球市场或多或少均存在低波动异象,即长期来看低波动率的股票相对高波动率的股票有更高的收益,更低的波动率,并且发达市场的低波动异象较新兴市场更强。本文通过对不同周期对波动率因子进行检验并调整,对波动率因子在我国A股市场的效果进行检验。

波动率异象:¶

经典资产定价理论(例如CAPM模型)认为资产的预期收益和其BETA值或系统性风险呈正向线性关系,高风险资产应该获得更高的预期收益,低风险资产应该获得更低的预期收益。然而与经典资产定价理论相违背,大量的学术研究在实证上揭示了长期来看低波动率(低风险)的股票相对高波动率(高风险)的股票有更高的收益,更低的波动率,即存在低波动异象。

研究思路:¶

(1)设置股票池及时间段,分别进行初始以及调整前的因子值记录。

(2)构建波动率与市值,行业调整的模型,用WLS进行回归分析。

(3)计算统计期内的因子值指标数据IC、T值、标准差。

(4)收益回测分析,统计分组,多空组合收益净值曲线。

研究结论:¶

(1)波动率与股票超额收益呈负相关关系,中国A股市场存在波动率异象。

(2)市值与行业调整均可以提高模型的IC值,调整后的因子在IC值以及收益上均优于基础波动率因子。

from jqdata import *
from jqfactor import standardlize
import numpy as np
import pandas as pd
import statsmodels.api as sm
import warnings  
warnings.filterwarnings('ignore')
#获取每周期第一天日期列表
def get_tradeday_list(start,end,frequency=None,count=None):
    if count != None:
        df = get_price('000001.XSHG',end_date=end,count=count)
    else:
        df = get_price('000001.XSHG',start_date=start,end_date=end)
    if frequency == None or frequency =='day':
        return df.index
    else:
        df['year-month'] = [str(i)[0:7] for i in df.index]
        if frequency == 'month':
            return df.drop_duplicates('year-month').index
        elif frequency == 'quarter':
            df['month'] = [str(i)[5:7] for i in df.index]
            df = df[(df['month']=='01') | (df['month']=='04') | (df['month']=='07') | (df['month']=='10') ]
            return df.drop_duplicates('year-month').index
        elif frequency =='halfyear':
            df['month'] = [str(i)[5:7] for i in df.index]
            df = df[(df['month']=='01') | (df['month']=='06')]
            return df.drop_duplicates('year-month').index 
#获取每月最后一个交易日列表
def get_last_day(start, end):
    trade_list = get_tradeday_list(start,end,frequency='month')
    trade_days_list = list(get_all_trade_days())
    last_day_list = []
    for item in trade_list:
        item = str(item)
        y = int(item[0:4])
        m = int(item[5:7])
        d = int(item[8:10])
        date = datetime.date(y,m,d)
        location = trade_days_list.index(date)
        last_day_list.append(trade_days_list[location - 1])
    last_day_list = last_day_list[1:]
    return last_day_list
#剔除上市不足3个月的以及ST股
def filter_stock(stockList,date,days=21*3):
    
    #去除上市距beginDate不足3个月的股票
    def delect_stop(stocks,beginDate,n=days):
        stockList=[]
        beginDate = beginDate
        for stock in stocks:
            start_date=get_security_info(stock).start_date
            if start_date<(beginDate-datetime.timedelta(days=n)):
                stockList.append(stock)
        return stockList
    
    #剔除ST股
    st_data=get_extras('is_st',stockList, count = 1,end_date=date)
    stockList = [stock for stock in stockList if not st_data[stock][0]]
    
    #剔除停牌、新股及退市股票
    stockList=delect_stop(stockList,date)

    return stockList
#获取股票所属行业
def get_industry_code_from_security(security,date=None):
    industry_index=get_industries(name='sw_l1').index
    for i in range(0,len(industry_index)):
        try:
            index = get_industry_stocks(industry_index[i],date=date).index(security)
            return industry_index[i]
        except:
            continue
    return u'未找到'
#计算波动率因子
def volatilty_cal(stock_index, date):
    stock_list = get_index_stocks(stock_index, date = date)
    stock_list = filter_stock(stock_list, date)
    stock_df = pd.DataFrame()
    for stock in stock_list:
        dastd = []
        for days in [20, 60, 120, 250]:
            price = get_price(stock, count = days, end_date = date, fields = ['avg'])
            price_pct = price.pct_change()
            price_pct = price_pct.dropna()
            price_pct = price_pct[price_pct['avg'] != 0]
            price_pct = price_pct.std()['avg']
            dastd.append(price_pct)
        stock_df[stock] = dastd
    stock_df = stock_df.T
    stock_df.columns = ['DASTD_1m', 'DASTD_3m', 'DASTD_6m', 'DASTD_1y']
    return stock_df
#增加市值因子
def value_cal(stock_list, date):
    value_df = pd.DataFrame()
    for stock in stock_list:
        q = query(valuation).filter(valuation.code == stock)
        df = get_fundamentals(q, date = date, statDate = None)
        circulating_market_cap = df.loc[0,'circulating_market_cap']
        value_df[stock] = [circulating_market_cap]
    value_df = value_df.T
    value_df.columns = ['value']
    return value_df
#计算每期对应超额收益率
def get_excess_return(stock_list, date, date_list):
    return_df = pd.DataFrame()
    date_loc = date_list.index(date)
    after_date_loc = date_loc + 1
    after_date = date_list[after_date_loc]
    benchmark = get_price('000985.XSHG', end_date=date, count = 1, fields = 'close').loc[date, 'close']
    after_benchmark = get_price('000985.XSHG', end_date=after_date, count = 1, fields = 'close').loc[after_date, 'close']
    benchmark_return = after_benchmark / benchmark
    for stock in stock_list:
        stock_price = get_price(stock, end_date=date, count = 1, fields = 'close').loc[date, 'close']
        after_stock_price = get_price(stock, end_date=after_date, count = 1, fields = 'close').loc[after_date, 'close']
        stock_return = after_stock_price / stock_price
        return_df[stock] = [stock_return - benchmark_return]
    return_df = return_df.T
    return_df.columns = ['excess_return']
    return return_df
#增加行业因子
def industry_cal(stock_list):
    industry_df = get_industries(name='sw_l1')
    for stock in stock_list:
        industry = get_industry_code_from_security(stock)
        industry_df[stock] = 0
        industry_df.loc[industry, stock] = 1
    industry_df = industry_df.drop(['name', 'start_date'], axis = 1)
    industry_df = industry_df.T
    industry_df = industry_df.drop(['未找到'], axis = 1)
    return industry_df
#拼接所有因子
def factor_combine(factor_list):
    factor_df = factor_list[0]
    for item in factor_list[1:]:
        factor_df = pd.concat([factor_df, item], axis = 1)
    factor_df = factor_df.dropna()
    return factor_df
#数据缩尾处理
def tail_adjust(data_df, rate):
    for column in data_df.columns:
        quantile_high = data_df.quantile(1-rate)[column]
        quantile_low = data_df.quantile(rate)[column]
        for index in data_df.index:
            if data_df.loc[index, column] > quantile_high:
                data_df.loc[index, column] = quantile_high
            elif data_df.loc[index, column] < quantile_low:
                data_df.loc[index, column] = quantile_low
    return data_df
#数据标准化处理
def data_standardlize(data_df):
    data_df = standardlize(data_df, axis = 0)
    for column in data_df.columns:
        while data_df[column].max() > 3 or data_df[column].min() < -3:
            data_df[column][data_df[column] > 3] = 3
            data_df[column][data_df[column] < -3] = -3
            data_df[column] = standardlize(data_df[column], axis = 0)
    return data_df
#整合初始因子
def get_primary_factors(date, date_list, stock_index):
    volatilty = volatilty_cal(stock_index, date)
    stock_list = list(volatilty.index)
    value = value_cal(stock_list, date)
    industry = industry_cal(stock_list)
    excess_return = get_excess_return(stock_list, date, date_list)
    factors_df = factor_combine([excess_return, volatilty, value, industry])
    return factors_df
#整合回归用因子
def get_regression_factors(date, date_list, stock_index):
    volatilty = volatilty_cal(stock_index, date)
    stock_list = list(volatilty.index)
    value = value_cal(stock_list, date)
    weight = sqrt(value)
    weight.columns = ['weight']
    industry = industry_cal(stock_list)
    excess_return = get_excess_return(stock_list, date, date_list)
    volatilty = tail_adjust(volatilty, 0.01)
    value = tail_adjust(value, 0.05)
    volatilty = data_standardlize(volatilty)
    value = data_standardlize(value)
    factors_df = factor_combine([excess_return, weight, volatilty, value, industry])
    return factors_df
#获取交易日列表
date_list = get_last_day(datetime.date(2011,8,2), datetime.date(2019,6,2))
date_list
[datetime.date(2011, 8, 31),
 datetime.date(2011, 9, 30),
 datetime.date(2011, 10, 31),
 datetime.date(2011, 11, 30),
 datetime.date(2011, 12, 30),
 datetime.date(2012, 1, 31),
 datetime.date(2012, 2, 29),
 datetime.date(2012, 3, 30),
 datetime.date(2012, 4, 27),
 datetime.date(2012, 5, 31),
 datetime.date(2012, 6, 29),
 datetime.date(2012, 7, 31),
 datetime.date(2012, 8, 31),
 datetime.date(2012, 9, 28),
 datetime.date(2012, 10, 31),
 datetime.date(2012, 11, 30),
 datetime.date(2012, 12, 31),
 datetime.date(2013, 1, 31),
 datetime.date(2013, 2, 28),
 datetime.date(2013, 3, 29),
 datetime.date(2013, 4, 26),
 datetime.date(2013, 5, 31),
 datetime.date(2013, 6, 28),
 datetime.date(2013, 7, 31),
 datetime.date(2013, 8, 30),
 datetime.date(2013, 9, 30),
 datetime.date(2013, 10, 31),
 datetime.date(2013, 11, 29),
 datetime.date(2013, 12, 31),
 datetime.date(2014, 1, 30),
 datetime.date(2014, 2, 28),
 datetime.date(2014, 3, 31),
 datetime.date(2014, 4, 30),
 datetime.date(2014, 5, 30),
 datetime.date(2014, 6, 30),
 datetime.date(2014, 7, 31),
 datetime.date(2014, 8, 29),
 datetime.date(2014, 9, 30),
 datetime.date(2014, 10, 31),
 datetime.date(2014, 11, 28),
 datetime.date(2014, 12, 31),
 datetime.date(2015, 1, 30),
 datetime.date(2015, 2, 27),
 datetime.date(2015, 3, 31),
 datetime.date(2015, 4, 30),
 datetime.date(2015, 5, 29),
 datetime.date(2015, 6, 30),
 datetime.date(2015, 7, 31),
 datetime.date(2015, 8, 31),
 datetime.date(2015, 9, 30),
 datetime.date(2015, 10, 30),
 datetime.date(2015, 11, 30),
 datetime.date(2015, 12, 31),
 datetime.date(2016, 1, 29),
 datetime.date(2016, 2, 29),
 datetime.date(2016, 3, 31),
 datetime.date(2016, 4, 29),
 datetime.date(2016, 5, 31),
 datetime.date(2016, 6, 30),
 datetime.date(2016, 7, 29),
 datetime.date(2016, 8, 31),
 datetime.date(2016, 9, 30),
 datetime.date(2016, 10, 31),
 datetime.date(2016, 11, 30),
 datetime.date(2016, 12, 30),
 datetime.date(2017, 1, 26),
 datetime.date(2017, 2, 28),
 datetime.date(2017, 3, 31),
 datetime.date(2017, 4, 28),
 datetime.date(2017, 5, 31),
 datetime.date(2017, 6, 30),
 datetime.date(2017, 7, 31),
 datetime.date(2017, 8, 31),
 datetime.date(2017, 9, 29),
 datetime.date(2017, 10, 31),
 datetime.date(2017, 11, 30),
 datetime.date(2017, 12, 29),
 datetime.date(2018, 1, 31),
 datetime.date(2018, 2, 28),
 datetime.date(2018, 3, 30),
 datetime.date(2018, 4, 27),
 datetime.date(2018, 5, 31),
 datetime.date(2018, 6, 29),
 datetime.date(2018, 7, 31),
 datetime.date(2018, 8, 31),
 datetime.date(2018, 9, 28),
 datetime.date(2018, 10, 31),
 datetime.date(2018, 11, 30),
 datetime.date(2018, 12, 28),
 datetime.date(2019, 1, 31),
 datetime.date(2019, 2, 28),
 datetime.date(2019, 3, 29),
 datetime.date(2019, 4, 30)]
#以字典形式记录日期列表的因子数据
regression_factor_dict = {}
for date in date_list[:-1]:
    print('正在计算{} 数据...'.format(str(date)[:7]))
    regression_factor_df = get_regression_factors(date, date_list, '000985.XSHG')
    regression_factor_dict[date] = regression_factor_df
正在计算2011-08 数据...
正在计算2011-09 数据...
正在计算2011-10 数据...
正在计算2011-11 数据...
正在计算2011-12 数据...
正在计算2012-01 数据...
正在计算2012-02 数据...
正在计算2012-03 数据...
正在计算2012-04 数据...
正在计算2012-05 数据...
正在计算2012-06 数据...
正在计算2012-07 数据...
正在计算2012-08 数据...
正在计算2012-09 数据...
正在计算2012-10 数据...
正在计算2012-11 数据...
正在计算2012-12 数据...
正在计算2013-01 数据...
正在计算2013-02 数据...
正在计算2013-03 数据...
正在计算2013-04 数据...
正在计算2013-05 数据...
正在计算2013-06 数据...
正在计算2013-07 数据...
正在计算2013-08 数据...
正在计算2013-09 数据...
正在计算2013-10 数据...
正在计算2013-11 数据...
正在计算2013-12 数据...
正在计算2014-01 数据...
正在计算2014-02 数据...
正在计算2014-03 数据...
正在计算2014-04 数据...
正在计算2014-05 数据...
正在计算2014-06 数据...
正在计算2014-07 数据...
正在计算2014-08 数据...
正在计算2014-09 数据...
正在计算2014-10 数据...
正在计算2014-11 数据...
正在计算2014-12 数据...
正在计算2015-01 数据...
正在计算2015-02 数据...
正在计算2015-03 数据...
正在计算2015-04 数据...
正在计算2015-05 数据...
正在计算2015-06 数据...
正在计算2015-07 数据...
正在计算2015-08 数据...
正在计算2015-09 数据...
正在计算2015-10 数据...
正在计算2015-11 数据...
正在计算2015-12 数据...
正在计算2016-01 数据...
正在计算2016-02 数据...
正在计算2016-03 数据...
正在计算2016-04 数据...
正在计算2016-05 数据...
正在计算2016-06 数据...
正在计算2016-07 数据...
正在计算2016-08 数据...
正在计算2016-09 数据...
正在计算2016-10 数据...
正在计算2016-11 数据...
正在计算2016-12 数据...
正在计算2017-01 数据...
正在计算2017-02 数据...
正在计算2017-03 数据...
正在计算2017-04 数据...
正在计算2017-05 数据...
正在计算2017-06 数据...
正在计算2017-07 数据...
正在计算2017-08 数据...
正在计算2017-09 数据...
正在计算2017-10 数据...
正在计算2017-11 数据...
正在计算2017-12 数据...
正在计算2018-01 数据...
正在计算2018-02 数据...
正在计算2018-03 数据...
正在计算2018-04 数据...
正在计算2018-05 数据...
正在计算2018-06 数据...
正在计算2018-07 数据...
正在计算2018-08 数据...
正在计算2018-09 数据...
正在计算2018-10 数据...
正在计算2018-11 数据...
正在计算2018-12 数据...
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/opt/conda/lib/python3.6/site-packages/pandas/core/indexing.py in _validate_key(self, key, axis)
   1789                 if not ax.contains(key):
-> 1790                     error()
   1791             except TypeError as e:

/opt/conda/lib/python3.6/site-packages/pandas/core/indexing.py in error()
   1784                                .format(key=key,
-> 1785                                        axis=self.obj._get_axis_name(axis)))
   1786 

KeyError: 'the label [0] is not in the [index]'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-16-3e6fed98477c> in <module>
      3 for date in date_list[:-1]:
      4     print('正在计算{} 数据...'.format(str(date)[:7]))
----> 5     regression_factor_df = get_regression_factors(date, date_list, '000985.XSHG')
      6     regression_factor_dict[date] = regression_factor_df

<ipython-input-14-99d4030bde33> in get_regression_factors(date, date_list, stock_index)
      3     volatilty = volatilty_cal(stock_index, date)
      4     stock_list = list(volatilty.index)
----> 5     value = value_cal(stock_list, date)
      6     weight = sqrt(value)
      7     weight.columns = ['weight']

<ipython-input-7-cc44bf17741d> in value_cal(stock_list, date)
      5         q = query(valuation).filter(valuation.code == stock)
      6         df = get_fundamentals(q, date = date, statDate = None)
----> 7         circulating_market_cap = df.loc[0,'circulating_market_cap']
      8         value_df[stock] = [circulating_market_cap]
      9     value_df = value_df.T

/opt/conda/lib/python3.6/site-packages/pandas/core/indexing.py in __getitem__(self, key)
   1470             except (KeyError, IndexError):
   1471                 pass
-> 1472             return self._getitem_tuple(key)
   1473         else:
   1474             # we by definition only have the 0th axis

/opt/conda/lib/python3.6/site-packages/pandas/core/indexing.py in _getitem_tuple(self, tup)
    868     def _getitem_tuple(self, tup):
    869         try:
--> 870             return self._getitem_lowerdim(tup)
    871         except IndexingError:
    872             pass

/opt/conda/lib/python3.6/site-packages/pandas/core/indexing.py in _getitem_lowerdim(self, tup)
    996         for i, key in enumerate(tup):
    997             if is_label_like(key) or isinstance(key, tuple):
--> 998                 section = self._getitem_axis(key, axis=i)
    999 
   1000                 # we have yielded a scalar ?

/opt/conda/lib/python3.6/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
   1909 
   1910         # fall thru to straight lookup
-> 1911         self._validate_key(key, axis)
   1912         return self._get_label(key, axis=axis)
   1913 

/opt/conda/lib/python3.6/site-packages/pandas/core/indexing.py in _validate_key(self, key, axis)
   1796                 raise
   1797             except:
-> 1798                 error()
   1799 
   1800     def _is_scalar_access(self, key):

/opt/conda/lib/python3.6/site-packages/pandas/core/indexing.py in error()
   1783                 raise KeyError(u"the label [{key}] is not in the [{axis}]"
   1784                                .format(key=key,
-> 1785                                        axis=self.obj._get_axis_name(axis)))
   1786 
   1787             try:

KeyError: 'the label [0] is not in the [index]'
#将计算出来的回归用因子值进行存储
#使用pickle模块将数据对象保存到文件
import pickle
pkl_file = open('volatilty_regression_factor.pkl', 'wb')
pickle.dump(regression_factor_dict, pkl_file, 0)
pkl_file.close()
#读取计算出来的回归用因子值
import pickle
pkl_file = open('volatilty_regression_factor.pkl', 'rb')
regression_factor_dict = pickle.load(pkl_file)
pkl_file.close()
#获取WLS系数及t值
def wls_results(factor_dict, factor):
    key_list = list(factor_dict.keys())
    param_list = []
    t_list = []
    r_list = []
    for item in key_list:
        factor_df = factor_dict[item]
        y = factor_df['excess_return']
        weight = factor_df['weight']
        X = pd.DataFrame()
        for x in factor:
            X[x] = factor_df[x]
        mod_wls = sm.WLS(y, X, weights=weight)
        res_wls = mod_wls.fit()
        params = res_wls.params[0]
        t_stat = res_wls.tvalues[0]
        r_squared = res_wls.rsquared_adj
        param_list.append(params)
        t_list.append(t_stat)
        r_list.append(r_squared)
    result_list = [mean(param_list), mean(t_list), mean(r_list)]
    return result_list
#综合全部的WLS系数以及R,t值
def wls_combine(factor_dict, factor_sort):
    basic_list = ['DASTD_1m', 'DASTD_3m', 'DASTD_6m', 'DASTD_1y']
    value_list = ['value']
    industry_list = list(get_industries(name='sw_l1').index)
    stat_df = pd.DataFrame()
    if factor_sort == 'basic':
        for item in basic_list:
            stat_df[item] = wls_results(factor_dict, [item])
    elif factor_sort == 'value':
        for item in basic_list:
            stat_df[item] = wls_results(factor_dict, [item, 'value'])
    elif factor_sort == 'industry':
        for item in basic_list:
            X = [item]
            for industry in industry_list:
                X.append(industry)
            stat_df[item] = wls_results(factor_dict, X)
    elif factor_sort == 'value_industry':
        for item in basic_list:
            X = [item, 'value']
            for industry in industry_list:
                X.append(industry)
            stat_df[item] = wls_results(factor_dict, X)
    stat_df = stat_df.T
    stat_df.columns = ['params', 't_stats', 'R_squared_adj']
    return stat_df

WLS回归¶

本文采取了四种以市值加权的月度回归方式,分别为因子截面回归、加入行业的因子截面回归、加入市值的因子截面回归、加入行业和市值的因子截面回归,其结果均显示波动率与超额收益呈负相关关系。在回归过程中,本文对因子均采取了缩尾异常值处理以及Zscore标准化,详细过程可见附注文献。

#基础回归统计量
basic_stats = wls_combine(regression_factor_dict, 'basic')
basic_stats
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
params t_stats R_squared_adj
DASTD_1m -0.003142 -2.035879 0.022947
DASTD_3m -0.003641 -2.190931 0.027116
DASTD_6m -0.003720 -2.191008 0.029064
DASTD_1y -0.003480 -2.192558 0.030857
#市值修正统计量
value_stats = wls_combine(regression_factor_dict, 'value')
value_stats
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
params t_stats R_squared_adj
DASTD_1m -0.003422 -2.029731 0.037098
DASTD_3m -0.004139 -2.284412 0.041502
DASTD_6m -0.004188 -2.286432 0.042552
DASTD_1y -0.004409 -2.356935 0.043096
#行业修正统计量
industry_stats = wls_combine(regression_factor_dict, 'industry')
industry_stats
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
params t_stats R_squared_adj
DASTD_1m -0.003384 -1.886462 0.151790
DASTD_3m -0.003354 -1.905577 0.154731
DASTD_6m -0.003380 -1.989301 0.157306
DASTD_1y -0.003775 -2.152903 0.158355
#市值行业修正统计量
value_industry_stats = wls_combine(regression_factor_dict, 'value_industry')
value_industry_stats
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
params t_stats R_squared_adj
DASTD_1m -0.004021 -2.133478 0.175919
DASTD_3m -0.004138 -2.175246 0.177759
DASTD_6m -0.004143 -2.216191 0.178465
DASTD_1y -0.004622 -2.326856 0.178294
#定义计算修正后因子值函数
def factor_cal(date, factor, factor_type):
    regression_factor_df = regression_factor_dict[day]
    if factor_type == 'value_industry':
        industry_list = list(get_industries(name='sw_l1').index)
        y = regression_factor_df['excess_return']
        weight = regression_factor_df['weight']
        X = pd.DataFrame()
        factor_list = [factor, 'value']
        for industry in industry_list:
            factor_list.append(industry)
        for x in factor_list:
            X[x] = regression_factor_df[x]
        mod_wls = sm.WLS(y, X, weights=weight)
        res_wls = mod_wls.fit()
        result = res_wls.fittedvalues
    elif factor_type == 'industry':
        industry_list = list(get_industries(name='sw_l1').index)
        y = regression_factor_df['excess_return']
        weight = regression_factor_df['weight']
        X = pd.DataFrame()
        factor_list = [factor]
        for industry in industry_list:
            factor_list.append(industry)
        for x in factor_list:
            X[x] = regression_factor_df[x]
        mod_wls = sm.WLS(y, X, weights=weight)
        res_wls = mod_wls.fit()
        result = res_wls.fittedvalues
    elif factor_type == 'value':
        y = regression_factor_df['excess_return']
        weight = regression_factor_df['weight']
        X = pd.DataFrame()
        factor_list = [factor, 'value']
        for x in factor_list:
            X[x] = regression_factor_df[x]
        mod_wls = sm.WLS(y, X, weights=weight)
        res_wls = mod_wls.fit()
        result = res_wls.fittedvalues
    elif factor_type == 'basic':
        y = regression_factor_df['excess_return']
        weight = regression_factor_df['weight']
        X = pd.DataFrame()
        factor_list = [factor]
        for x in factor_list:
            X[x] = regression_factor_df[x]
        mod_wls = sm.WLS(y, X, weights=weight)
        res_wls = mod_wls.fit()
        result = res_wls.fittedvalues
    return result

IC分析¶

通过IC分析我们发现,对因子进行市值以及行业中性可以有效提高IC值。

#统计IC
ic_df = pd.DataFrame()
mark = 0
trade_list = list(regression_factor_dict.keys())
for d in trade_list[:-1]:
    regression_factor_df = regression_factor_dict[d]
    factor_list = []
    for item in ['basic', 'value', 'industry', 'value_industry']:
        for factor in ['DASTD_1m','DASTD_3m','DASTD_6m','DASTD_1y']:
            regression_factor_df[str(factor) + str('_') + str(item)] = factor_cal(d, factor, item)
            factor_list.append(str(factor) + str('_') + str(item))
    factor_list.append('excess_return')
    ic_df[d] = (regression_factor_df[factor_list]).corr().iloc[:-1,-1]
#成分因子逐月IC的统计特征
ic_fea_tab = pd.DataFrame()
ic_fea_tab['ic']=ic_df.mean(axis=1)
ic_fea_tab['std']=ic_df.std(axis=1)
ic_fea_tab['ic_ir']= ic_fea_tab['ic']/ic_fea_tab['std']
ic_fea_tab
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
ic std ic_ir
DASTD_1m_basic -0.000171 0.108760 -0.001568
DASTD_3m_basic -0.010774 0.100076 -0.107655
DASTD_6m_basic -0.011555 0.097721 -0.118246
DASTD_1y_basic -0.015416 0.116725 -0.132074
DASTD_1m_value -0.001801 0.102975 -0.017487
DASTD_3m_value -0.012579 0.093508 -0.134521
DASTD_6m_value -0.014409 0.087127 -0.165383
DASTD_1y_value -0.018219 0.106959 -0.170341
DASTD_1m_industry -0.001071 0.050091 -0.021373
DASTD_3m_industry -0.006109 0.050142 -0.121837
DASTD_6m_industry -0.006797 0.049684 -0.136802
DASTD_1y_industry -0.006867 0.052660 -0.130412
DASTD_1m_value_industry -0.000800 0.050413 -0.015875
DASTD_3m_value_industry -0.005639 0.050793 -0.111019
DASTD_6m_value_industry -0.006780 0.049709 -0.136392
DASTD_1y_value_industry -0.006488 0.053080 -0.122223
#以字典形式记录日期列表的初始因子数据
real_factor_dict = {}
for date in date_list[:-1]:
    print('正在计算{} 数据...'.format(str(date)[:7]))
    real_factor_df = get_primary_factors(date, date_list, '000985.XSHG')
    real_factor_dict[date] = real_factor_df
正在计算2011-08 数据...
正在计算2011-09 数据...
正在计算2011-10 数据...
正在计算2011-11 数据...
正在计算2011-12 数据...
正在计算2012-01 数据...
正在计算2012-02 数据...
正在计算2012-03 数据...
正在计算2012-04 数据...
正在计算2012-05 数据...
正在计算2012-06 数据...
正在计算2012-07 数据...
正在计算2012-08 数据...
正在计算2012-09 数据...
正在计算2012-10 数据...
正在计算2012-11 数据...
正在计算2012-12 数据...
正在计算2013-01 数据...
正在计算2013-02 数据...
正在计算2013-03 数据...
正在计算2013-04 数据...
正在计算2013-05 数据...
正在计算2013-06 数据...
正在计算2013-07 数据...
正在计算2013-08 数据...
正在计算2013-09 数据...
正在计算2013-10 数据...
正在计算2013-11 数据...
正在计算2013-12 数据...
正在计算2014-01 数据...
正在计算2014-02 数据...
正在计算2014-03 数据...
正在计算2014-04 数据...
正在计算2014-05 数据...
正在计算2014-06 数据...
正在计算2014-07 数据...
正在计算2014-08 数据...
正在计算2014-09 数据...
正在计算2014-10 数据...
正在计算2014-11 数据...
正在计算2014-12 数据...
正在计算2015-01 数据...
正在计算2015-02 数据...
正在计算2015-03 数据...
正在计算2015-04 数据...
正在计算2015-05 数据...
正在计算2015-06 数据...
正在计算2015-07 数据...
正在计算2015-08 数据...
正在计算2015-09 数据...
正在计算2015-10 数据...
正在计算2015-11 数据...
正在计算2015-12 数据...
正在计算2016-01 数据...
正在计算2016-02 数据...
正在计算2016-03 数据...
正在计算2016-04 数据...
正在计算2016-05 数据...
正在计算2016-06 数据...
正在计算2016-07 数据...
正在计算2016-08 数据...
正在计算2016-09 数据...
正在计算2016-10 数据...
正在计算2016-11 数据...
正在计算2016-12 数据...
正在计算2017-01 数据...
正在计算2017-02 数据...
正在计算2017-03 数据...
正在计算2017-04 数据...
正在计算2017-05 数据...
正在计算2017-06 数据...
正在计算2017-07 数据...
正在计算2017-08 数据...
正在计算2017-09 数据...
正在计算2017-10 数据...
正在计算2017-11 数据...
正在计算2017-12 数据...
正在计算2018-01 数据...
正在计算2018-02 数据...
正在计算2018-03 数据...
正在计算2018-04 数据...
正在计算2018-05 数据...
正在计算2018-06 数据...
正在计算2018-07 数据...
正在计算2018-08 数据...
正在计算2018-09 数据...
正在计算2018-10 数据...
正在计算2018-11 数据...
正在计算2018-12 数据...
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/opt/conda/lib/python3.6/site-packages/pandas/core/indexing.py in _validate_key(self, key, axis)
   1789                 if not ax.contains(key):
-> 1790                     error()
   1791             except TypeError as e:

/opt/conda/lib/python3.6/site-packages/pandas/core/indexing.py in error()
   1784                                .format(key=key,
-> 1785                                        axis=self.obj._get_axis_name(axis)))
   1786 

KeyError: 'the label [0] is not in the [index]'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-104-682e0d65ecdb> in <module>
      3 for date in date_list[:-1]:
      4     print('正在计算{} 数据...'.format(str(date)[:7]))
----> 5     real_factor_df = get_primary_factors(date, date_list, '000985.XSHG')
      6     real_factor_dict[date] = real_factor_df

<ipython-input-13-c051662a6376> in get_primary_factors(date, date_list, stock_index)
      3     volatilty = volatilty_cal(stock_index, date)
      4     stock_list = list(volatilty.index)
----> 5     value = value_cal(stock_list, date)
      6     industry = industry_cal(stock_list)
      7     excess_return = get_excess_return(stock_list, date, date_list)

<ipython-input-7-cc44bf17741d> in value_cal(stock_list, date)
      5         q = query(valuation).filter(valuation.code == stock)
      6         df = get_fundamentals(q, date = date, statDate = None)
----> 7         circulating_market_cap = df.loc[0,'circulating_market_cap']
      8         value_df[stock] = [circulating_market_cap]
      9     value_df = value_df.T

/opt/conda/lib/python3.6/site-packages/pandas/core/indexing.py in __getitem__(self, key)
   1470             except (KeyError, IndexError):
   1471                 pass
-> 1472             return self._getitem_tuple(key)
   1473         else:
   1474             # we by definition only have the 0th axis

/opt/conda/lib/python3.6/site-packages/pandas/core/indexing.py in _getitem_tuple(self, tup)
    868     def _getitem_tuple(self, tup):
    869         try:
--> 870             return self._getitem_lowerdim(tup)
    871         except IndexingError:
    872             pass

/opt/conda/lib/python3.6/site-packages/pandas/core/indexing.py in _getitem_lowerdim(self, tup)
    996         for i, key in enumerate(tup):
    997             if is_label_like(key) or isinstance(key, tuple):
--> 998                 section = self._getitem_axis(key, axis=i)
    999 
   1000                 # we have yielded a scalar ?

/opt/conda/lib/python3.6/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
   1909 
   1910         # fall thru to straight lookup
-> 1911         self._validate_key(key, axis)
   1912         return self._get_label(key, axis=axis)
   1913 

/opt/conda/lib/python3.6/site-packages/pandas/core/indexing.py in _validate_key(self, key, axis)
   1796                 raise
   1797             except:
-> 1798                 error()
   1799 
   1800     def _is_scalar_access(self, key):

/opt/conda/lib/python3.6/site-packages/pandas/core/indexing.py in error()
   1783                 raise KeyError(u"the label [{key}] is not in the [{axis}]"
   1784                                .format(key=key,
-> 1785                                        axis=self.obj._get_axis_name(axis)))
   1786 
   1787             try:

KeyError: 'the label [0] is not in the [index]'
#将计算出来的初始因子值进行存储
#使用pickle模块将数据对象保存到文件
import pickle
pkl_file = open('volatilty_real_factor.pkl', 'wb')
pickle.dump(real_factor_dict, pkl_file, 0)
pkl_file.close()
#读取计算出来的初始因子值
pkl_file = open('volatilty_real_factor.pkl', 'rb')
real_factor_dict = pickle.load(pkl_file)
pkl_file.close()

分组回测¶

与上文类似地,本文选取了四种周期下经不同方式调整的多种情况,每种情况分组进行回测并画出多空收益曲线,结论与上文相似,调整后的因子可以显著改善策略的收益情况。

#分组回测相关函数
def ret_se(start_date='2018-6-1',end_date='2018-7-1',stock_pool=None,weight=0):
    pool = stock_pool
    if len(pool) != 0:
        #得到股票的历史价格数据
        df = get_price(list(pool),start_date=start_date,end_date=end_date,fields=['close']).close
        df = df.dropna(axis=1)
        #获取列表中的股票流通市值对数值
        df_mkt = get_fundamentals(query(valuation.code,valuation.circulating_market_cap).filter(valuation.code.in_(df.columns)))
        df_mkt.index = df_mkt['code'].values
        fact_se =pd.Series(df_mkt['circulating_market_cap'].values,index = df_mkt['code'].values)
        fact_se = np.log(fact_se)
    else:
        df = get_price('000001.XSHG',start_date=start_date,end_date=end_date,fields=['close'])
        df['v'] = [1]*len(df)
        del df['close']
    #相当于昨天的百分比变化
    pct = df.pct_change()+1
    pct.iloc[0,:] = 1
    if weight == 0:
        #等权重平均收益结果
        se = pct.cumsum(axis=1).iloc[:,-1]/pct.shape[1]
        return se
    else:
        #按权重的方式计算
        se = (pct*fact_se).cumsum(axis=1).iloc[:,-1]/sum(fact_se)
        return se
    
#获取所有分组pct
def get_all_pct(pool_dict,trade_list,groups=5):
    num = 1
    for s,e in zip(trade_list[:-1],trade_list[1:]):
        stock_list = pool_dict[s]
        stock_num = len(stock_list)//groups
        if num == 0:
            pct_se_list = []
            for i in range(groups):
                pct_se_list.append(ret_se(start_date=s,end_date=e,stock_pool=stock_list[i*stock_num:(i+1)*stock_num]))
            pct_df1 = pd.concat(pct_se_list,axis=1)
            pct_df = pd.concat([pct_df,pct_df1],axis=0)
        else:
            pct_se_list = []
            for i in range(groups):
                pct_se_list.append(ret_se(start_date=s,end_date=e,stock_pool=stock_list[i*stock_num:(i+1)*stock_num]))
            pct_df = pd.concat(pct_se_list,axis=1)    
            num = 0
    return pct_df
#分组回测部分/'DASTD_1m'
pkl_file = open('volatilty_real_factor.pkl', 'rb')
real_factor_dict = pickle.load(pkl_file)
pkl_file.close()
group = 5 #分组组数
pool_dict = real_factor_dict
key_list = real_factor_dict.keys()
factor_df = pd.DataFrame()
factor_df[0] = pool_dict[list(key_list)[-1]]['DASTD_1m']
for day in key_list:
    regression_factor_df = regression_factor_dict[day]
    y = regression_factor_df['excess_return']
    weight = regression_factor_df['weight']
    X = pd.DataFrame()
    factor_list = ['DASTD_1m']
    for x in factor_list:
        X[x] = regression_factor_df[x]
    mod_wls = sm.WLS(y, X, weights=weight)
    res_wls = mod_wls.fit()
    factor_df[day] = res_wls.fittedvalues 
factor_df = factor_df.T
factor_df = factor_df.drop([0])
for i in range(len(factor_df.index)):
    temp_se = factor_df.iloc[0,:].sort_values(ascending=False)#从大到小排序
    temp_se = temp_se.dropna() #去掉空值
    pool = temp_se.index #不做负值处理
    num = int(len(pool)/group)
    pool_dict[factor_df.index[i]] = pool
trade_list = factor_df.index
group_pct = get_all_pct(pool_dict,trade_list,groups=group)
group_pct.columns = ['group'+str(i) for i in range(len(group_pct.columns))]
group_pct.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c97891710>
#多空组合收益曲线
se1 = group_pct['group0']-group_pct['group4']+1
df = pd.DataFrame(se1,columns=['多空组合'])
df.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c976c01d0>
#分组回测部分/'DASTD_3m'
pkl_file = open('volatilty_real_factor.pkl', 'rb')
real_factor_dict = pickle.load(pkl_file)
pkl_file.close()
group = 5 #分组组数
pool_dict = real_factor_dict
key_list = real_factor_dict.keys()
factor_df = pd.DataFrame()
factor_df[0] = pool_dict[list(key_list)[-1]]['DASTD_3m']
for day in key_list:
    regression_factor_df = regression_factor_dict[day]
    y = regression_factor_df['excess_return']
    weight = regression_factor_df['weight']
    X = pd.DataFrame()
    factor_list = ['DASTD_3m']
    for x in factor_list:
        X[x] = regression_factor_df[x]
    mod_wls = sm.WLS(y, X, weights=weight)
    res_wls = mod_wls.fit()
    factor_df[day] = res_wls.fittedvalues
factor_df = factor_df.T
factor_df = factor_df.drop([0])
for i in range(len(factor_df.index)):
    temp_se = factor_df.iloc[0,:].sort_values(ascending=False)#从大到小排序
    temp_se = temp_se.dropna() #去掉空值
    pool = temp_se.index #不做负值处理
    num = int(len(pool)/group)
    pool_dict[factor_df.index[i]] = pool
trade_list = factor_df.index
group_pct = get_all_pct(pool_dict,trade_list,groups=group)
group_pct.columns = ['group'+str(i) for i in range(len(group_pct.columns))]
group_pct.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c979be630>
#多空组合收益曲线
se1 = group_pct['group0']-group_pct['group4']+1
df = pd.DataFrame(se1,columns=['多空组合'])
df.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c979d6358>
#分组回测部分/'DASTD_6m'
pkl_file = open('volatilty_real_factor.pkl', 'rb')
real_factor_dict = pickle.load(pkl_file)
pkl_file.close()
group = 5 #分组组数
pool_dict = real_factor_dict
key_list = real_factor_dict.keys()
factor_df = pd.DataFrame()
factor_df[0] = pool_dict[list(key_list)[-1]]['DASTD_6m']
for day in key_list:
    regression_factor_df = regression_factor_dict[day]
    y = regression_factor_df['excess_return']
    weight = regression_factor_df['weight']
    X = pd.DataFrame()
    factor_list = ['DASTD_6m']
    for x in factor_list:
        X[x] = regression_factor_df[x]
    mod_wls = sm.WLS(y, X, weights=weight)
    res_wls = mod_wls.fit()
    factor_df[day] = res_wls.fittedvalues 
factor_df = factor_df.T
factor_df = factor_df.drop([0])
for i in range(len(factor_df.index)):
    temp_se = factor_df.iloc[0,:].sort_values(ascending=False)#从大到小排序
    temp_se = temp_se.dropna() #去掉空值
    pool = temp_se.index #不做负值处理
    num = int(len(pool)/group)
    pool_dict[factor_df.index[i]] = pool
trade_list = factor_df.index
group_pct = get_all_pct(pool_dict,trade_list,groups=group)
group_pct.columns = ['group'+str(i) for i in range(len(group_pct.columns))]
group_pct.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c9568a048>
#多空组合收益曲线
se1 = group_pct['group0']-group_pct['group4']+1
df = pd.DataFrame(se1,columns=['多空组合'])
df.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c71143be0>
#分组回测部分/'DASTD_1y'
pkl_file = open('volatilty_real_factor.pkl', 'rb')
real_factor_dict = pickle.load(pkl_file)
pkl_file.close()
group = 5 #分组组数
pool_dict = real_factor_dict
key_list = real_factor_dict.keys()
factor_df = pd.DataFrame()
factor_df[0] = pool_dict[list(key_list)[-1]]['DASTD_1y']
for day in key_list:
    regression_factor_df = regression_factor_dict[day]
    y = regression_factor_df['excess_return']
    weight = regression_factor_df['weight']
    X = pd.DataFrame()
    factor_list = ['DASTD_1y']
    for x in factor_list:
        X[x] = regression_factor_df[x]
    mod_wls = sm.WLS(y, X, weights=weight)
    res_wls = mod_wls.fit()
    factor_df[day] = res_wls.fittedvalues 
factor_df = factor_df.T
factor_df = factor_df.drop([0])
for i in range(len(factor_df.index)):
    temp_se = factor_df.iloc[0,:].sort_values(ascending=False)#从大到小排序
    temp_se = temp_se.dropna() #去掉空值
    pool = temp_se.index #不做负值处理
    num = int(len(pool)/group)
    pool_dict[factor_df.index[i]] = pool
trade_list = factor_df.index
group_pct = get_all_pct(pool_dict,trade_list,groups=group)
group_pct.columns = ['group'+str(i) for i in range(len(group_pct.columns))]
group_pct.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c70e18b38>
#多空组合收益曲线
se1 = group_pct['group0']-group_pct['group4']+1
df = pd.DataFrame(se1,columns=['多空组合'])
df.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c968fe978>
#分组回测部分/市值中性的'DASTD_1m'
pkl_file = open('volatilty_real_factor.pkl', 'rb')
real_factor_dict = pickle.load(pkl_file)
pkl_file.close()
group = 5 #分组组数
pool_dict = real_factor_dict
key_list = real_factor_dict.keys()
factor_df = pd.DataFrame()
factor_df[0] = pool_dict[list(key_list)[-1]]['DASTD_1m']
for day in key_list:    
    regression_factor_df = regression_factor_dict[day]
    y = regression_factor_df['excess_return']
    weight = regression_factor_df['weight']
    X = pd.DataFrame()
    factor_list = ['DASTD_1m', 'value']
    for x in factor_list:
        X[x] = regression_factor_df[x]
    mod_wls = sm.WLS(y, X, weights=weight)
    res_wls = mod_wls.fit()
    factor_df[day] = res_wls.fittedvalues    
factor_df = factor_df.T
factor_df = factor_df.drop([0])
for i in range(len(factor_df.index)):
    temp_se = factor_df.iloc[0,:].sort_values(ascending=False)#从大到小排序
    temp_se = temp_se.dropna() #去掉空值
    pool = temp_se.index #不做负值处理
    num = int(len(pool)/group)
    pool_dict[factor_df.index[i]] = pool
trade_list = factor_df.index
group_pct = get_all_pct(pool_dict,trade_list,groups=group)
group_pct.columns = ['group'+str(i) for i in range(len(group_pct.columns))]
group_pct.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c9551ca90>
#多空组合收益曲线
se1 = group_pct['group0']-group_pct['group4']+1
df = pd.DataFrame(se1,columns=['多空组合'])
df.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c95b2ad68>
#分组回测部分/市值中性的'DASTD_3m'
pkl_file = open('volatilty_real_factor.pkl', 'rb')
real_factor_dict = pickle.load(pkl_file)
pkl_file.close()
group = 5 #分组组数
pool_dict = real_factor_dict
key_list = real_factor_dict.keys()
factor_df = pd.DataFrame()
factor_df[0] = pool_dict[list(key_list)[-1]]['DASTD_3m']
for day in key_list:    
    regression_factor_df = regression_factor_dict[day]
    y = regression_factor_df['excess_return']
    weight = regression_factor_df['weight']
    X = pd.DataFrame()
    factor_list = ['DASTD_3m', 'value']
    for x in factor_list:
        X[x] = regression_factor_df[x]
    mod_wls = sm.WLS(y, X, weights=weight)
    res_wls = mod_wls.fit()
    factor_df[day] = res_wls.fittedvalues    
factor_df = factor_df.T
factor_df = factor_df.drop([0])
for i in range(len(factor_df.index)):
    temp_se = factor_df.iloc[0,:].sort_values(ascending=False)#从大到小排序
    temp_se = temp_se.dropna() #去掉空值
    pool = temp_se.index #不做负值处理
    num = int(len(pool)/group)
    pool_dict[factor_df.index[i]] = pool
trade_list = factor_df.index
group_pct = get_all_pct(pool_dict,trade_list,groups=group)
group_pct.columns = ['group'+str(i) for i in range(len(group_pct.columns))]
group_pct.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c70b45b38>
#多空组合收益曲线
se1 = group_pct['group0']-group_pct['group4']+1
df = pd.DataFrame(se1,columns=['多空组合'])
df.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c9744bf60>
#分组回测部分/市值中性的'DASTD_6m'
pkl_file = open('volatilty_real_factor.pkl', 'rb')
real_factor_dict = pickle.load(pkl_file)
pkl_file.close()
group = 5 #分组组数
pool_dict = real_factor_dict
key_list = real_factor_dict.keys()
factor_df = pd.DataFrame()
factor_df[0] = pool_dict[list(key_list)[-1]]['DASTD_6m']
for day in key_list:    
    regression_factor_df = regression_factor_dict[day]
    y = regression_factor_df['excess_return']
    weight = regression_factor_df['weight']
    X = pd.DataFrame()
    factor_list = ['DASTD_6m', 'value']
    for x in factor_list:
        X[x] = regression_factor_df[x]
    mod_wls = sm.WLS(y, X, weights=weight)
    res_wls = mod_wls.fit()
    factor_df[day] = res_wls.fittedvalues    
factor_df = factor_df.T
factor_df = factor_df.drop([0])
for i in range(len(factor_df.index)):
    temp_se = factor_df.iloc[0,:].sort_values(ascending=False)#从大到小排序
    temp_se = temp_se.dropna() #去掉空值
    pool = temp_se.index #不做负值处理
    num = int(len(pool)/group)
    pool_dict[factor_df.index[i]] = pool
trade_list = factor_df.index
group_pct = get_all_pct(pool_dict,trade_list,groups=group)
group_pct.columns = ['group'+str(i) for i in range(len(group_pct.columns))]
group_pct.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c71043d30>
#多空组合收益曲线
se1 = group_pct['group0']-group_pct['group4']+1
df = pd.DataFrame(se1,columns=['多空组合'])
df.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c95ab1b38>
#分组回测部分/市值中性的'DASTD_1y'
pkl_file = open('volatilty_real_factor.pkl', 'rb')
real_factor_dict = pickle.load(pkl_file)
pkl_file.close()
group = 5 #分组组数
pool_dict = real_factor_dict
key_list = real_factor_dict.keys()
factor_df = pd.DataFrame()
factor_df[0] = pool_dict[list(key_list)[-1]]['DASTD_1y']
for day in key_list:    
    regression_factor_df = regression_factor_dict[day]
    y = regression_factor_df['excess_return']
    weight = regression_factor_df['weight']
    X = pd.DataFrame()
    factor_list = ['DASTD_1', 'value']
    for x in factor_list:
        X[x] = regression_factor_df[x]
    mod_wls = sm.WLS(y, X, weights=weight)
    res_wls = mod_wls.fit()
    factor_df[day] = res_wls.fittedvalues    
factor_df = factor_df.T
factor_df = factor_df.drop([0])
for i in range(len(factor_df.index)):
    temp_se = factor_df.iloc[0,:].sort_values(ascending=False)#从大到小排序
    temp_se = temp_se.dropna() #去掉空值
    pool = temp_se.index #不做负值处理
    num = int(len(pool)/group)
    pool_dict[factor_df.index[i]] = pool
trade_list = factor_df.index
group_pct = get_all_pct(pool_dict,trade_list,groups=group)
group_pct.columns = ['group'+str(i) for i in range(len(group_pct.columns))]
group_pct.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c954f5208>
#多空组合收益曲线
se1 = group_pct['group0']-group_pct['group4']+1
df = pd.DataFrame(se1,columns=['多空组合'])
df.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c969221d0>
#分组回测部分/行业中性的'DASTD_1m'
pkl_file = open('volatilty_real_factor.pkl', 'rb')
real_factor_dict = pickle.load(pkl_file)
pkl_file.close()
group = 5 #分组组数
pool_dict = real_factor_dict
key_list = real_factor_dict.keys()
factor_df = pd.DataFrame()
factor_df[0] = pool_dict[list(key_list)[-1]]['DASTD_1m']
industry_list = list(get_industries(name='sw_l1').index)
for day in key_list:    
    regression_factor_df = regression_factor_dict[day]
    y = regression_factor_df['excess_return']
    weight = regression_factor_df['weight']
    X = pd.DataFrame()
    factor_list = ['DASTD_1m']
    for industry in industry_list:
        factor_list.append(industry)
    for x in factor_list:
        X[x] = regression_factor_df[x]
    mod_wls = sm.WLS(y, X, weights=weight)
    res_wls = mod_wls.fit()
    factor_df[day] = res_wls.fittedvalues    
factor_df = factor_df.T
factor_df = factor_df.drop([0])
for i in range(len(factor_df.index)):
    temp_se = factor_df.iloc[0,:].sort_values(ascending=False)#从大到小排序
    temp_se = temp_se.dropna() #去掉空值
    pool = temp_se.index #不做负值处理
    num = int(len(pool)/group)
    pool_dict[factor_df.index[i]] = pool
trade_list = factor_df.index
group_pct = get_all_pct(pool_dict,trade_list,groups=group)
group_pct.columns = ['group'+str(i) for i in range(len(group_pct.columns))]
group_pct.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c954fd0b8>
#多空组合收益曲线
se1 = group_pct['group0']-group_pct['group4']+1
df = pd.DataFrame(se1,columns=['多空组合'])
df.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c95d12518>
#分组回测部分/行业中性的'DASTD_3m'
pkl_file = open('volatilty_real_factor.pkl', 'rb')
real_factor_dict = pickle.load(pkl_file)
pkl_file.close()
group = 5 #分组组数
pool_dict = real_factor_dict
key_list = real_factor_dict.keys()
factor_df = pd.DataFrame()
factor_df[0] = pool_dict[list(key_list)[-1]]['DASTD_3m']
industry_list = list(get_industries(name='sw_l1').index)
for day in key_list:    
    regression_factor_df = regression_factor_dict[day]
    y = regression_factor_df['excess_return']
    weight = regression_factor_df['weight']
    X = pd.DataFrame()
    factor_list = ['DASTD_3m']
    for industry in industry_list:
        factor_list.append(industry)
    for x in factor_list:
        X[x] = regression_factor_df[x]
    mod_wls = sm.WLS(y, X, weights=weight)
    res_wls = mod_wls.fit()
    factor_df[day] = res_wls.fittedvalues    
factor_df = factor_df.T
factor_df = factor_df.drop([0])
for i in range(len(factor_df.index)):
    temp_se = factor_df.iloc[0,:].sort_values(ascending=False)#从大到小排序
    temp_se = temp_se.dropna() #去掉空值
    pool = temp_se.index #不做负值处理
    num = int(len(pool)/group)
    pool_dict[factor_df.index[i]] = pool
trade_list = factor_df.index
group_pct = get_all_pct(pool_dict,trade_list,groups=group)
group_pct.columns = ['group'+str(i) for i in range(len(group_pct.columns))]
group_pct.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c968cfc88>
#多空组合收益曲线
se1 = group_pct['group0']-group_pct['group4']+1
df = pd.DataFrame(se1,columns=['多空组合'])
df.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c96466518>
#分组回测部分/行业中性的'DASTD_6m'
pkl_file = open('volatilty_real_factor.pkl', 'rb')
real_factor_dict = pickle.load(pkl_file)
pkl_file.close()
group = 5 #分组组数
pool_dict = real_factor_dict
key_list = real_factor_dict.keys()
factor_df = pd.DataFrame()
factor_df[0] = pool_dict[list(key_list)[-1]]['DASTD_6m']
industry_list = list(get_industries(name='sw_l1').index)
for day in key_list:    
    regression_factor_df = regression_factor_dict[day]
    y = regression_factor_df['excess_return']
    weight = regression_factor_df['weight']
    X = pd.DataFrame()
    factor_list = ['DASTD_6m']
    for industry in industry_list:
        factor_list.append(industry)
    for x in factor_list:
        X[x] = regression_factor_df[x]
    mod_wls = sm.WLS(y, X, weights=weight)
    res_wls = mod_wls.fit()
    factor_df[day] = res_wls.fittedvalues    
factor_df = factor_df.T
factor_df = factor_df.drop([0])
for i in range(len(factor_df.index)):
    temp_se = factor_df.iloc[0,:].sort_values(ascending=False)#从大到小排序
    temp_se = temp_se.dropna() #去掉空值
    pool = temp_se.index #不做负值处理
    num = int(len(pool)/group)
    pool_dict[factor_df.index[i]] = pool
trade_list = factor_df.index
group_pct = get_all_pct(pool_dict,trade_list,groups=group)
group_pct.columns = ['group'+str(i) for i in range(len(group_pct.columns))]
group_pct.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c7157a780>
#多空组合收益曲线
se1 = group_pct['group0']-group_pct['group4']+1
df = pd.DataFrame(se1,columns=['多空组合'])
df.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c953d2518>
#分组回测部分/行业中性的'DASTD_1y'
pkl_file = open('volatilty_real_factor.pkl', 'rb')
real_factor_dict = pickle.load(pkl_file)
pkl_file.close()
group = 5 #分组组数
pool_dict = real_factor_dict
key_list = real_factor_dict.keys()
factor_df = pd.DataFrame()
factor_df[0] = pool_dict[list(key_list)[-1]]['DASTD_1y']
industry_list = list(get_industries(name='sw_l1').index)
for day in key_list:    
    regression_factor_df = regression_factor_dict[day]
    y = regression_factor_df['excess_return']
    weight = regression_factor_df['weight']
    X = pd.DataFrame()
    factor_list = ['DASTD_1y']
    for industry in industry_list:
        factor_list.append(industry)
    for x in factor_list:
        X[x] = regression_factor_df[x]
    mod_wls = sm.WLS(y, X, weights=weight)
    res_wls = mod_wls.fit()
    factor_df[day] = res_wls.fittedvalues    
factor_df = factor_df.T
factor_df = factor_df.drop([0])
for i in range(len(factor_df.index)):
    temp_se = factor_df.iloc[0,:].sort_values(ascending=False)#从大到小排序
    temp_se = temp_se.dropna() #去掉空值
    pool = temp_se.index #不做负值处理
    num = int(len(pool)/group)
    pool_dict[factor_df.index[i]] = pool
trade_list = factor_df.index
group_pct = get_all_pct(pool_dict,trade_list,groups=group)
group_pct.columns = ['group'+str(i) for i in range(len(group_pct.columns))]
group_pct.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c957976d8>
#多空组合收益曲线
se1 = group_pct['group0']-group_pct['group4']+1
df = pd.DataFrame(se1,columns=['多空组合'])
df.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c70b0af98>
#分组回测部分/市值与行业中性的'DASTD_1m'
pkl_file = open('volatilty_real_factor.pkl', 'rb')
real_factor_dict = pickle.load(pkl_file)
pkl_file.close()
group = 5 #分组组数
pool_dict = real_factor_dict
key_list = real_factor_dict.keys()
factor_df = pd.DataFrame()
factor_df[0] = pool_dict[list(key_list)[-1]]['DASTD_1m']
industry_list = list(get_industries(name='sw_l1').index)
for day in key_list:    
    regression_factor_df = regression_factor_dict[day]
    y = regression_factor_df['excess_return']
    weight = regression_factor_df['weight']
    X = pd.DataFrame()
    factor_list = ['DASTD_1m', 'value']
    for industry in industry_list:
        factor_list.append(industry)
    for x in factor_list:
        X[x] = regression_factor_df[x]
    mod_wls = sm.WLS(y, X, weights=weight)
    res_wls = mod_wls.fit()
    factor_df[day] = res_wls.fittedvalues    
factor_df = factor_df.T
factor_df = factor_df.drop([0])
for i in range(len(factor_df.index)):
    temp_se = factor_df.iloc[0,:].sort_values(ascending=False)#从大到小排序
    temp_se = temp_se.dropna() #去掉空值
    pool = temp_se.index #不做负值处理
    num = int(len(pool)/group)
    pool_dict[factor_df.index[i]] = pool
trade_list = factor_df.index
group_pct = get_all_pct(pool_dict,trade_list,groups=group)
group_pct.columns = ['group'+str(i) for i in range(len(group_pct.columns))]
group_pct.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c70f07860>
#多空组合收益曲线
se1 = group_pct['group0']-group_pct['group4']+1
df = pd.DataFrame(se1,columns=['多空组合'])
df.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c95a641d0>
#分组回测部分/市值与行业中性的'DASTD_3m'
pkl_file = open('volatilty_real_factor.pkl', 'rb')
real_factor_dict = pickle.load(pkl_file)
pkl_file.close()
group = 5 #分组组数
pool_dict = real_factor_dict
key_list = real_factor_dict.keys()
factor_df = pd.DataFrame()
factor_df[0] = pool_dict[list(key_list)[-1]]['DASTD_3m']
industry_list = list(get_industries(name='sw_l1').index)
for day in key_list:    
    regression_factor_df = regression_factor_dict[day]
    y = regression_factor_df['excess_return']
    weight = regression_factor_df['weight']
    X = pd.DataFrame()
    factor_list = ['DASTD_3m', 'value']
    for industry in industry_list:
        factor_list.append(industry)
    for x in factor_list:
        X[x] = regression_factor_df[x]
    mod_wls = sm.WLS(y, X, weights=weight)
    res_wls = mod_wls.fit()
    factor_df[day] = res_wls.fittedvalues    
factor_df = factor_df.T
factor_df = factor_df.drop([0])
for i in range(len(factor_df.index)):
    temp_se = factor_df.iloc[0,:].sort_values(ascending=False)#从大到小排序
    temp_se = temp_se.dropna() #去掉空值
    pool = temp_se.index #不做负值处理
    num = int(len(pool)/group)
    pool_dict[factor_df.index[i]] = pool
trade_list = factor_df.index
group_pct = get_all_pct(pool_dict,trade_list,groups=group)
group_pct.columns = ['group'+str(i) for i in range(len(group_pct.columns))]
group_pct.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c96904630>
#多空组合收益曲线
se1 = group_pct['group0']-group_pct['group4']+1
df = pd.DataFrame(se1,columns=['多空组合'])
df.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c70ed49e8>
#分组回测部分/市值与行业中性的'DASTD_6m'
pkl_file = open('volatilty_real_factor.pkl', 'rb')
real_factor_dict = pickle.load(pkl_file)
pkl_file.close()
group = 5 #分组组数
pool_dict = real_factor_dict
key_list = real_factor_dict.keys()
factor_df = pd.DataFrame()
factor_df[0] = pool_dict[list(key_list)[-1]]['DASTD_6m']
industry_list = list(get_industries(name='sw_l1').index)
for day in key_list:    
    regression_factor_df = regression_factor_dict[day]
    y = regression_factor_df['excess_return']
    weight = regression_factor_df['weight']
    X = pd.DataFrame()
    factor_list = ['DASTD_6m', 'value']
    for industry in industry_list:
        factor_list.append(industry)
    for x in factor_list:
        X[x] = regression_factor_df[x]
    mod_wls = sm.WLS(y, X, weights=weight)
    res_wls = mod_wls.fit()
    factor_df[day] = res_wls.fittedvalues    
factor_df = factor_df.T
factor_df = factor_df.drop([0])
for i in range(len(factor_df.index)):
    temp_se = factor_df.iloc[0,:].sort_values(ascending=False)#从大到小排序
    temp_se = temp_se.dropna() #去掉空值
    pool = temp_se.index #不做负值处理
    num = int(len(pool)/group)
    pool_dict[factor_df.index[i]] = pool
trade_list = factor_df.index
group_pct = get_all_pct(pool_dict,trade_list,groups=group)
group_pct.columns = ['group'+str(i) for i in range(len(group_pct.columns))]
group_pct.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c97610be0>
#多空组合收益曲线
se1 = group_pct['group0']-group_pct['group4']+1
df = pd.DataFrame(se1,columns=['多空组合'])
df.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c95a1b278>
#分组回测部分/市值与行业中性的'DASTD_1y'
pkl_file = open('volatilty_real_factor.pkl', 'rb')
real_factor_dict = pickle.load(pkl_file)
pkl_file.close()
group = 5 #分组组数
pool_dict = real_factor_dict
key_list = real_factor_dict.keys()
factor_df = pd.DataFrame()
factor_df[0] = pool_dict[list(key_list)[-1]]['DASTD_1y']
industry_list = list(get_industries(name='sw_l1').index)
for day in key_list:    
    regression_factor_df = regression_factor_dict[day]
    y = regression_factor_df['excess_return']
    weight = regression_factor_df['weight']
    X = pd.DataFrame()
    factor_list = ['DASTD_1y', 'value']
    for industry in industry_list:
        factor_list.append(industry)
    for x in factor_list:
        X[x] = regression_factor_df[x]
    mod_wls = sm.WLS(y, X, weights=weight)
    res_wls = mod_wls.fit()
    factor_df[day] = res_wls.fittedvalues    
factor_df = factor_df.T
factor_df = factor_df.drop([0])
for i in range(len(factor_df.index)):
    temp_se = factor_df.iloc[0,:].sort_values(ascending=False)#从大到小排序
    temp_se = temp_se.dropna() #去掉空值
    pool = temp_se.index #不做负值处理
    num = int(len(pool)/group)
    pool_dict[factor_df.index[i]] = pool
trade_list = factor_df.index
group_pct = get_all_pct(pool_dict,trade_list,groups=group)
group_pct.columns = ['group'+str(i) for i in range(len(group_pct.columns))]
group_pct.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c95976a58>
#多空组合收益曲线
se1 = group_pct['group0']-group_pct['group4']+1
df = pd.DataFrame(se1,columns=['多空组合'])
df.cumprod().plot(figsize=(12,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8c96b6d2b0>
 
 

全部回复

0/140

量化课程

    移动端课程