from jqfactor import Factor, calc_factors
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
stock = get_index_stocks('000300.XSHG')

class Hs300Alpha(Factor):
    # 设置因子名称
    name = 'hs300_alpha'
    # 设置获取数据的时间窗口长度
    max_window = 10
    # 设置依赖的数据
    dependencies = ['close']

    # 计算因子的函数， 需要返回一个 pandas.Series, index 是股票代码，value 是因子值
    def calc(self, data):
        # 获取个股的收盘价数据
        close = data['close']
        # 计算个股近10日收益
        stock_return = close.iloc[-1,:]/close.iloc[0,:] -1
        # 获取指数（沪深300）的收盘价数据
        index_close = self._get_extra_data(securities=['000300.XSHG'], fields=['close'])['close']
        # 计算指数的近10日收益
        index_return = index_close.iat[-1,0]/index_close.iat[0,0] - 1
        # 计算 alpha
        alpha = stock_return - index_return
        return alpha
factors = calc_factors(stock, [Hs300Alpha()], start_date='2017-01-01', end_date='2017-12-31')

/opt/conda/envs/python3new/lib/python3.6/site-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools

data=factors['hs300_alpha']

from jqdata import *
sw=get_industries(name='sw_l1').index
a=[0]*len(sw)
for i in range(len(sw)):
    a[i]=data[list(set(data.columns).intersection(set(get_industry_stocks(sw[i]))))].mean().mean()

from pyecharts import Bar
from pyecharts import online
online()
bar = Bar()
names=["农林牧渔","采掘","化工","钢铁",'有色金属','电子','家用电器','食品饮料','纺织服装','轻工制造',
'医药生物','公用事业','交通运输','房地产','商业贸易','休闲服务','综合','建筑材料','建筑装饰','电气设备',
'国防军工','计算机','传媒I','通信','银行','非银金融','汽车','机械设备']
bar.add("各行业因子值均值", names, a,
        is_more_utils=True)
bar

#获得行业哑变量矩阵
from jqdata import *
sw=get_industries(name='sw_l1').index
industry=pd.DataFrame(0,columns=data.columns,index=range(0,28))
for i in range(len(sw)):
    temp=list(set(data.columns).intersection(set(get_industry_stocks(sw[i]))))
    industry.loc[i,temp]=1

#去除市值、行业因素，得到新的因子值 
newx=pd.DataFrame()
for i in range(len(data.index)):
    m= get_fundamentals(query(valuation.circulating_cap,valuation.code).filter(valuation.code.in_(data.columns)), date=data.index[i])
    m.index=np.array(m['code'])
    m=m.iloc[:,0]
    m=(m-mean(m))/std(m)
    x=data.iloc[i,:]
    conc=pd.concat([x,m,industry.T],axis=1).fillna(mean(m))
    est=sm.OLS(conc.iloc[:,0],conc.iloc[:,1:]).fit()
    y_fitted = est.fittedvalues
    newx[i]=est.resid
newx=newx.T
newx.index=data.index
newx=newx.iloc[1:,:]

量化交易吧 / 量化平台 帖子：3370253 新帖：10

【笔记】单因子有效性分析（二）：行业市值中性化

外汇交易达人发表于：5 月 10 日 07：10回复(1)

1、为什么要做行业中性化

2、如何做行业市值中性化

全部回复

0/140

粉丝:565

帖子数:0

粉丝:686

帖子数:0

粉丝:676

帖子数:391

量化课程

热门标签

删除回复

确认要删除这篇文章么？

举报用户

信息提示

该文章已删除

设置置顶

完成设置【置顶】！

设置置顶

已取消设置【置顶】！

设置精华

完成设置【精华】！

设置精华

已取消设置【精华】！

审核信息

该文章已审核通过

审核信息

您已设置该文章审核不通过

举报成功

您已举报成功

用户登录

移动帖子

创建私信

屏蔽提示

确认要屏蔽该用户么？

屏蔽回复

您已对该用户实现屏蔽

信息回复

已发送成功

量化交易吧 / 量化平台帖子：3370253 新帖：10