请 [注册] 或 [登录]  | 返回主站

量化交易吧 /  量化平台 帖子:3365770 新帖:2

标准因子库因子分析模板

我就是英雄发表于:7 月 25 日 20:29回复(1)

本模板主要用于分析标准库Alpha101,和Alpha191两个标准库中的因子,以及使用def()方式自定义的因子进行全信息分析,只用引入因子,设置测试时间,测试股票池子,就能实现快速的完成因子测试,本模板基于聚宽的因子研究模板,做了适当的修改。在此感谢聚宽。

标准因子库因子分析模板¶

# 导入函数库
from jqdata import *
import numpy as np
import pandas as pd
import jqfactor
from jqlib import alpha191
from jqlib.alpha191 import *
from jqfactor import get_factor_values
from jqfactor import Factor
from jqfactor import analyze_factor
warnings.filterwarnings('ignore') 
#获取交易日期列表
#四个参数分别为开始日期,结束日期,交易周期('day','month','quarter','halfyear'),count:天数,与start二者取其一
def get_tradeday_list(start,end,frequency=None,count=None):
    if count != None:
        df = get_price('000001.XSHG',end_date=end,count=count)
    else:
        df = get_price('000001.XSHG',start_date=start,end_date=end)
    #如果频率为“day”或者没有设置,直接返回df.index
    if frequency == None or frequency =='day':
        return df.index
    else:
        #否则增加一列'year-month',值为前7位
        df['year-month'] = [str(i)[0:7] for i in df.index]
        if frequency == 'month':
            return df.drop_duplicates('year-month').index#删除重复项,设置'year-month'为index
        elif frequency == 'quarter':#季度
            df['month'] = [str(i)[5:7] for i in df.index]
            df = df[(df['month']=='01') | (df['month']=='04') | (df['month']=='07') | (df['month']=='10') ]
            return df.drop_duplicates('year-month').index
        elif frequency =='halfyear':
            df['month'] = [str(i)[5:7] for i in df.index]
            df = df[(df['month']=='01') | (df['month']=='06')]
            return df.drop_duplicates('year-month').index 
# 设置起止时间
start='2016-07-01'
end='2016-08-01'
# 设置调仓周期
periods=(5,10,20)
# 设置分层数量
quantiles=5
#获取日期列表
date_list = get_tradeday_list(start=start,end=end,count=None)#获取回测日期间的所有交易日
date_list
DatetimeIndex(['2016-07-01', '2016-07-04', '2016-07-05', '2016-07-06',
               '2016-07-07', '2016-07-08', '2016-07-11', '2016-07-12',
               '2016-07-13', '2016-07-14', '2016-07-15', '2016-07-18',
               '2016-07-19', '2016-07-20', '2016-07-21', '2016-07-22',
               '2016-07-25', '2016-07-26', '2016-07-27', '2016-07-28',
               '2016-07-29', '2016-08-01'],
              dtype='datetime64[ns]', freq=None)
 
#定义一个空的dataframe记录因子值
factor_df = pd.DataFrame()
#循环计算给定日期范围的因子值
mark = 1
for d in date_list:
    pool = get_index_stocks('000905.XSHG',date=d)
    #alpha191.alpha_005为因子库,标准因子,也可以用定义因子函数
    far=alpha191.alpha_005(pool, end_date=d)
    if mark == 1:
        factor_df = far
        mark = 0
    else:
        #逐日合并factor_df
        factor_df = pd.concat([far,factor_df],axis=1,sort=True)
#将columns更改为可以日期标签
factor_df.columns = date_list
factor_df.head()
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
2016-07-01 00:00:00 2016-07-04 00:00:00 2016-07-05 00:00:00 2016-07-06 00:00:00 2016-07-07 00:00:00 2016-07-08 00:00:00 2016-07-11 00:00:00 2016-07-12 00:00:00 2016-07-13 00:00:00 2016-07-14 00:00:00 2016-07-15 00:00:00 2016-07-18 00:00:00 2016-07-19 00:00:00 2016-07-20 00:00:00 2016-07-21 00:00:00 2016-07-22 00:00:00 2016-07-25 00:00:00 2016-07-26 00:00:00 2016-07-27 00:00:00 2016-07-28 00:00:00 2016-07-29 00:00:00 2016-08-01 00:00:00
000006.XSHE -0.635001 -0.843543 -0.843543 -0.852803 -0.852803 -0.852803 -0.774597 -0.512272 -0.512272 -0.512272 -0.487122 -0.562122 -0.562122 -0.937614 -0.937614 -0.937614 -0.850420 -0.850420 -0.850420 -0.720093 -0.720093 -0.720093
000008.XSHE -0.408248 -0.408248 -0.281362 -0.740464 -0.988601 -0.988601 -0.988601 -0.811641 -0.329060 -0.329060 -0.124035 -0.177822 -0.428571 -0.428571 -0.684653 -0.684653 -0.748455 -0.798762 -0.798762 -0.798762 -0.791727 -0.791727
000012.XSHE -0.722222 -0.559451 -0.492763 -0.492763 -0.612372 -0.612372 -0.612372 -0.468807 -0.468807 -0.912871 -0.962533 -0.962533 -0.962533 -0.846114 -0.846114 -0.846114 -0.642857 -0.505650 -0.505650 -0.542326 -0.542326 -0.542326
000021.XSHE -0.962533 -0.962533 -0.962533 -0.906327 -0.906327 -0.807573 -0.751809 -0.751809 -0.751809 -0.748455 -0.943456 -0.943456 -0.943456 -0.938194 -0.906327 -0.848875 -0.848875 -0.871033 -0.899836 -0.924595 -1.000000 -1.000000
000028.XSHE -0.891042 -0.886621 -0.886621 -0.886621 -0.870388 -0.841625 -0.841625 -0.517857 -0.720577 -0.866025 -0.866025 -0.868599 -0.868599 -0.868599 -0.504184 -0.504184 -0.319438 -0.319438 -0.104257 -0.852941 -0.852941 -0.968822
#数据清洗、包括去极值、标准化、中性化等,并加入y值
from jqfactor import *
for date in date_list:
    #对数据进行处理、标准化、去极值、中性化
    #factor_df = winsorize_med(factor_df, scale=3, inclusive=True, inf2nan=True, axis=0) #中位数去极值处理
    se = standardlize(factor_df[date], inf2nan=True) #对每列做标准化处理
    se = neutralize(se, how=['liquidity'], date=date)#剔除原始因子值与流动性相关的部分
    factor_df[date] = se
#进行转置,调整为分析可用的格式
factor_df = factor_df.T
factor_df.head()
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
000006.XSHE 000008.XSHE 000012.XSHE 000021.XSHE 000028.XSHE 000030.XSHE 000031.XSHE 000049.XSHE 000050.XSHE 000062.XSHE 000066.XSHE 000078.XSHE 000088.XSHE 000090.XSHE 000099.XSHE 000158.XSHE 000400.XSHE 000401.XSHE 000417.XSHE 000418.XSHE 000426.XSHE 000488.XSHE 000501.XSHE 000513.XSHE 000517.XSHE 000519.XSHE 000528.XSHE 000541.XSHE 000543.XSHE 000547.XSHE 000550.XSHE 000552.XSHE 000563.XSHE 000566.XSHE 000572.XSHE 000581.XSHE 000587.XSHE 000592.XSHE 000596.XSHE 000598.XSHE ... 601010.XSHG 601012.XSHG 601126.XSHG 601155.XSHG 601168.XSHG 601226.XSHG 601233.XSHG 601311.XSHG 601369.XSHG 601515.XSHG 601666.XSHG 601678.XSHG 601699.XSHG 601717.XSHG 601777.XSHG 601801.XSHG 601880.XSHG 601886.XSHG 601908.XSHG 601929.XSHG 601965.XSHG 603001.XSHG 603005.XSHG 603019.XSHG 603025.XSHG 603077.XSHG 603169.XSHG 603188.XSHG 603198.XSHG 603328.XSHG 603355.XSHG 603555.XSHG 603567.XSHG 603568.XSHG 603589.XSHG 603698.XSHG 603699.XSHG 603766.XSHG 603806.XSHG 603883.XSHG
2016-07-01 0.231763 0.945993 -0.090748 -0.897105 -0.567578 -0.802039 0.581632 -0.208363 0.274692 0.149607 -0.749742 -0.102788 -0.392699 0.559172 -0.776750 -0.236863 -0.730941 -0.744520 -0.439433 -0.307533 0.113132 -0.957738 -0.163030 0.246473 -0.909116 -0.171478 -0.646537 -0.846402 -0.258432 NaN 0.764034 -0.065410 -0.563251 -0.935460 0.459805 -0.350909 -0.239551 0.195170 0.215930 -0.423308 ... -0.214790 -0.155528 1.033451 3.721916 -1.015402 -0.768985 0.824647 0.780369 -0.111290 3.745133 -0.844951 -0.907128 -0.937447 0.103437 -0.543733 1.074766 -0.922364 -0.616630 -0.895976 -0.295999 0.108096 -0.521604 -0.597208 0.413012 0.200713 -0.703347 -0.040143 -0.129788 -0.959252 -0.820271 -0.648068 1.608188 0.552354 -0.035383 -0.509194 -0.893137 0.594446 -0.721119 -0.771872 -0.861156
2016-07-04 -0.406269 1.093013 0.555934 -0.895472 -0.498090 -0.762448 -0.734186 -0.163767 0.349778 0.243663 -0.723311 0.050897 -0.304575 -0.522543 0.082522 -0.180520 -0.564042 -0.663059 -0.380333 -0.243984 -0.164431 -0.958278 2.967263 0.352183 0.251559 -0.236034 -0.736321 -0.666628 -0.198734 NaN 0.930695 0.006849 -0.522523 -0.846381 1.662512 -0.588617 -0.892251 0.284100 0.319561 -0.597328 ... 0.188962 -0.094045 1.526310 1.123846 -1.020658 -0.579102 0.339443 0.167108 -0.502390 0.042842 -0.296736 -0.858885 -0.956424 0.636216 -0.522532 1.273013 -0.882375 -0.580846 -0.883974 -0.179414 0.205312 0.339244 -0.590995 -0.641782 0.261875 -0.669555 0.026954 0.076684 -0.994593 -0.966786 -0.647393 1.801072 0.640672 0.016811 -0.514169 -0.912633 0.557092 -0.900441 -0.787180 0.242480
2016-07-05 -0.401285 1.782362 0.949923 -0.931690 -0.517102 -0.945282 -0.757040 -0.100411 -0.102794 -0.123546 0.411409 0.612741 0.791016 -0.534879 0.164907 0.112037 -0.560812 -0.693272 -0.381712 -0.176549 -0.282514 -1.003120 0.312477 0.439985 0.006676 -0.665349 -0.786880 -0.895325 -0.108994 NaN 1.060398 -1.045359 0.177773 -0.886941 2.351317 -0.601182 -0.928323 0.382503 0.402740 -0.784371 ... 0.555670 -0.045963 1.733705 -0.612716 -1.071174 -0.584942 0.189264 0.255116 -0.549832 0.071842 -0.302113 -0.833555 -1.014960 0.917714 -0.504333 1.431436 -0.854660 -0.610674 -1.025960 -0.167934 0.974516 2.055944 -0.178631 -0.627450 1.529856 -0.653033 1.469174 0.245981 -0.498548 -0.979364 -0.627354 -0.537767 1.267578 1.008011 1.692023 -0.945552 0.682088 -0.944038 -0.824689 1.108833
2016-07-06 -0.417838 -0.025962 0.906350 -0.671749 -0.490890 -0.897741 -0.717348 -0.088532 -0.682076 -0.477116 0.564916 0.679696 0.755144 -0.505960 0.041761 -0.324875 -0.527374 -0.085151 -0.284333 0.115453 -0.283994 -0.530617 0.298159 0.421399 0.015292 -0.629631 -0.746540 -0.847280 -0.101762 NaN 1.568719 -0.990095 0.596334 -0.842405 2.964437 -0.568041 -0.884682 0.857151 0.724735 -0.744740 ... 0.307411 0.730455 2.558616 -1.069368 -1.016776 -0.040871 -0.862456 0.248211 -0.522810 0.069378 -0.286092 -0.724079 -0.965078 0.874580 1.659889 2.159834 -0.200260 -0.755986 -0.973226 -0.158353 -0.805162 3.013865 0.376503 -0.593616 0.906291 -0.878647 1.403338 0.003055 -0.473790 -0.923899 -0.593010 -0.849972 1.214303 0.966954 4.700672 -0.964850 0.653542 -0.894701 -0.777305 2.963095
2016-07-07 -0.500049 -0.877638 0.152898 -0.657806 -0.537376 -0.888251 -0.909078 0.787839 -0.764854 -0.803352 3.369584 2.045013 0.747095 -0.480296 -0.820760 -0.570956 -0.474312 0.779063 -0.419509 -0.121454 -0.574920 0.135247 -0.011712 0.496038 -0.442120 -0.606084 -0.725685 -0.784233 -0.822920 NaN 0.981749 -0.907433 2.377357 -0.750620 2.103462 -0.303399 -0.690922 -0.468237 0.797639 -0.752161 ... -0.042563 0.749059 2.065187 -0.923243 -0.912675 0.296944 -0.826247 0.741971 -0.630535 0.251184 -0.358099 -0.051157 -0.809692 0.853749 -0.366360 2.697316 -0.332747 -0.774432 -0.903411 0.331775 -0.801092 0.302056 2.042541 -0.330265 0.545391 -0.844454 2.429983 -0.377529 -0.371835 -0.571463 -0.564439 -0.814180 2.186676 3.152651 3.334748 -0.843972 1.467489 -0.467947 -0.871181 2.260305
#使用获取的因子值进行单因子分析
far = analyze_factor(factor=factor_df, start_date=date_list[0], end_date=date_list[-1], weight_method='avg', industry='jq_l1', quantiles=quantiles, periods=periods,max_loss=0.3)
# 打印信息比率(IC)相关表
far.plot_information_table(group_adjust=False, method='rank')
IC 分析
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
period_5 period_10 period_20
IC Mean -0.012 -0.044 -0.045
IC Std. 0.115 0.136 0.085
IR -0.104 -0.327 -0.526
t-stat(IC) -0.487 -1.533 -2.468
p-value(IC) 0.632 0.140 0.022
IC Skew -1.769 -1.218 -0.286
IC Kurtosis 3.055 0.260 -1.396
# 画各分位数平均收益图
far.plot_quantile_returns_bar(by_group=False, demeaned=0, group_adjust=False)
<Figure size 432x288 with 0 Axes>
# 打印换手率表
far.plot_turnover_table()
换手率分析
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
period_10 period_20 period_5
Quantile 1 Mean Turnover 0.798 0.833 0.770
Quantile 2 Mean Turnover 0.773 0.859 0.768
Quantile 3 Mean Turnover 0.804 0.781 0.794
Quantile 4 Mean Turnover 0.804 0.780 0.799
Quantile 5 Mean Turnover 0.782 0.828 0.758
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
period_5 period_10 period_20
Mean Factor Rank Autocorrelation 0.071 0.009 -0.059
#调用因子分析方法,进行因子信息全览
far.create_full_tear_sheet(demeaned=False, group_adjust=False, by_group=False, turnover_periods=None, avgretplot=(5, 15), std_bar=False)
分位数统计
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
min max mean std count count %
factor_quantile
1 -1.213127 -0.685984 -0.873548 0.089777 2125 20.066100
2 -0.806479 -0.451365 -0.640396 0.078383 2114 19.962229
3 -0.550986 -0.036495 -0.341379 0.115173 2114 19.962229
4 -0.219804 0.789970 0.165859 0.217510 2114 19.962229
5 0.459805 6.871189 1.623212 0.973111 2123 20.047214
-------------------------

收益分析
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
period_5 period_10 period_20
Ann. alpha -0.084 -0.104 -0.089
beta -0.042 -0.089 0.150
Mean Period Wise Return Top Quantile (bps) -5.976 -9.289 -3.702
Mean Period Wise Return Bottom Quantile (bps) -3.109 -3.872 1.348
Mean Period Wise Spread (bps) -2.805 -5.235 -5.119
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
-------------------------

IC 分析
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
period_5 period_10 period_20
IC Mean -0.012 -0.044 -0.045
IC Std. 0.115 0.136 0.085
IR -0.104 -0.327 -0.526
t-stat(IC) -0.487 -1.533 -2.468
p-value(IC) 0.632 0.140 0.022
IC Skew -1.769 -1.218 -0.286
IC Kurtosis 3.055 0.260 -1.396
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
-------------------------

换手率分析
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
period_10 period_20 period_5
Quantile 1 Mean Turnover 0.798 0.833 0.770
Quantile 2 Mean Turnover 0.773 0.859 0.768
Quantile 3 Mean Turnover 0.804 0.781 0.794
Quantile 4 Mean Turnover 0.804 0.780 0.799
Quantile 5 Mean Turnover 0.782 0.828 0.758
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
period_5 period_10 period_20
Mean Factor Rank Autocorrelation 0.071 0.009 -0.059
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
-------------------------

<Figure size 432x288 with 0 Axes>
# 计算指定调仓周期的各分位数每日累积收益
df = far.calc_cumulative_return_by_quantile(period=5)
#进行数据展示
df.plot(figsize=(15,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7fa95b7d57f0>
 

全部回复

0/140

量化课程

    移动端课程